| 1 | import math |
|---|
| 2 | import functools |
|---|
| 3 | from operator import add |
|---|
| 4 | from collections import Iterable |
|---|
| 5 | |
|---|
| 6 | import numpy |
|---|
| 7 | |
|---|
| 8 | import Orange |
|---|
| 9 | from Orange import statc, corn |
|---|
| 10 | from Orange.utils import deprecated_keywords, deprecated_function_name, \ |
|---|
| 11 | deprecation_warning, environ |
|---|
| 12 | from Orange.evaluation import testing |
|---|
| 13 | |
|---|
| 14 | try: |
|---|
| 15 | import matplotlib |
|---|
| 16 | HAS_MATPLOTLIB = True |
|---|
| 17 | except ImportError: |
|---|
| 18 | matplotlib = None |
|---|
| 19 | HAS_MATPLOTLIB = False |
|---|
| 20 | |
|---|
| 21 | #### Private stuff |
|---|
| 22 | |
|---|
| 23 | def log2(x): |
|---|
| 24 | """Calculate logarithm in base 2.""" |
|---|
| 25 | return math.log(x) / math.log(2) |
|---|
| 26 | |
|---|
| 27 | def check_non_zero(x): |
|---|
| 28 | """Throw Value Error when x = 0.""" |
|---|
| 29 | if x == 0.: |
|---|
| 30 | raise ValueError, "Cannot compute the score: no examples or sum of weights is 0." |
|---|
| 31 | |
|---|
| 32 | def gettotweight(res): |
|---|
| 33 | """Sum all the weights.""" |
|---|
| 34 | totweight = reduce(lambda x, y: x + y.weight, res.results, 0) |
|---|
| 35 | if totweight == 0.: |
|---|
| 36 | raise ValueError, "Cannot compute the score: sum of weights is 0." |
|---|
| 37 | return totweight |
|---|
| 38 | |
|---|
| 39 | def gettotsize(res): |
|---|
| 40 | """Get number of result instances.""" |
|---|
| 41 | if len(res.results): |
|---|
| 42 | return len(res.results) |
|---|
| 43 | else: |
|---|
| 44 | raise ValueError, "Cannot compute the score: no examples." |
|---|
| 45 | |
|---|
| 46 | # Backward compatibility |
|---|
| 47 | def replace_use_weights(fun): |
|---|
| 48 | if environ.orange_no_deprecated_members: |
|---|
| 49 | return fun |
|---|
| 50 | |
|---|
| 51 | @functools.wraps(fun) |
|---|
| 52 | def wrapped(*args, **kwargs): |
|---|
| 53 | use_weights = kwargs.pop("useWeights", None) |
|---|
| 54 | if use_weights is not None: |
|---|
| 55 | deprecation_warning("useWeights", "ignore_weights") |
|---|
| 56 | kwargs["ignore_weights"] = not use_weights |
|---|
| 57 | return fun(*args, **kwargs) |
|---|
| 58 | return wrapped |
|---|
| 59 | |
|---|
| 60 | def replace_discrete_probabilities_with_list(method): |
|---|
| 61 | if environ.orange_no_deprecated_members: |
|---|
| 62 | return lambda fun: fun |
|---|
| 63 | |
|---|
| 64 | def decorator(fun): |
|---|
| 65 | @functools.wraps(fun) |
|---|
| 66 | def wrapped(*args, **kwargs): |
|---|
| 67 | res = args[method] if len(args)>method else kwargs.get("res", kwargs.get("test_results", None)) |
|---|
| 68 | convert = res is not None |
|---|
| 69 | |
|---|
| 70 | if convert: |
|---|
| 71 | old_probs = [] |
|---|
| 72 | for r in res.results: |
|---|
| 73 | old_probs.append(r.probabilities) |
|---|
| 74 | r.probabilities = [list(p) if type(p) is Orange.statistics.distribution.Discrete |
|---|
| 75 | else p for p in r.probabilities] |
|---|
| 76 | result = fun(*args, **kwargs) |
|---|
| 77 | if convert: |
|---|
| 78 | for r, old in zip(res.results, old_probs): |
|---|
| 79 | r.probabilities = old |
|---|
| 80 | return result |
|---|
| 81 | return wrapped |
|---|
| 82 | return decorator |
|---|
| 83 | |
|---|
| 84 | def split_by_iterations(res): |
|---|
| 85 | """Split ExperimentResults of a multiple iteratation test into a list |
|---|
| 86 | of ExperimentResults, one for each iteration. |
|---|
| 87 | """ |
|---|
| 88 | if res.number_of_iterations < 2: |
|---|
| 89 | return [res] |
|---|
| 90 | |
|---|
| 91 | ress = [Orange.evaluation.testing.ExperimentResults( |
|---|
| 92 | 1, res.classifier_names, res.class_values, |
|---|
| 93 | res.weights, classifiers=res.classifiers, |
|---|
| 94 | loaded=res.loaded, test_type=res.test_type, labels=res.labels) |
|---|
| 95 | for _ in range(res.number_of_iterations)] |
|---|
| 96 | for te in res.results: |
|---|
| 97 | ress[te.iteration_number].results.append(te) |
|---|
| 98 | return ress |
|---|
| 99 | |
|---|
| 100 | def split_by_classifiers(res): |
|---|
| 101 | """Split an instance of :obj:`ExperimentResults` into a list of |
|---|
| 102 | :obj:`ExperimentResults`, one for each classifier. |
|---|
| 103 | """ |
|---|
| 104 | split_res = [] |
|---|
| 105 | for i in range(len(res.classifierNames)): |
|---|
| 106 | r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations, |
|---|
| 107 | [res.classifierNames[i]], res.classValues, |
|---|
| 108 | weights=res.weights, base_class=res.base_class, |
|---|
| 109 | classifiers=[res.classifiers[i]] if res.classifiers else [], |
|---|
| 110 | test_type=res.test_type, labels=res.labels) |
|---|
| 111 | r.results = [] |
|---|
| 112 | for te in res.results: |
|---|
| 113 | r.results.append(Orange.evaluation.testing.TestedExample( |
|---|
| 114 | te.iterationNumber, te.actualClass, n=1, weight=te.weight)) |
|---|
| 115 | r.results[-1].classes = [te.classes[i]] |
|---|
| 116 | r.results[-1].probabilities = [te.probabilities[i]] |
|---|
| 117 | split_res.append(r) |
|---|
| 118 | return split_res |
|---|
| 119 | |
|---|
| 120 | |
|---|
| 121 | def class_probabilities_from_res(res, **argkw): |
|---|
| 122 | """Calculate class probabilities.""" |
|---|
| 123 | probs = [0.] * len(res.class_values) |
|---|
| 124 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 125 | for tex in res.results: |
|---|
| 126 | probs[int(tex.actual_class)] += 1. |
|---|
| 127 | totweight = gettotsize(res) |
|---|
| 128 | else: |
|---|
| 129 | totweight = 0. |
|---|
| 130 | for tex in res.results: |
|---|
| 131 | probs[tex.actual_class] += tex.weight |
|---|
| 132 | totweight += tex.weight |
|---|
| 133 | check_non_zero(totweight) |
|---|
| 134 | return [prob / totweight for prob in probs] |
|---|
| 135 | |
|---|
| 136 | |
|---|
| 137 | @deprecated_keywords({ |
|---|
| 138 | "foldN": "fold_n", |
|---|
| 139 | "reportSE": "report_se", |
|---|
| 140 | "iterationIsOuter": "iteration_is_outer"}) |
|---|
| 141 | def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer): |
|---|
| 142 | # remove empty folds, turn the matrix so that learner is outer |
|---|
| 143 | if iteration_is_outer: |
|---|
| 144 | if not stats: |
|---|
| 145 | raise ValueError, "Cannot compute the score: no examples or sum of weights is 0." |
|---|
| 146 | number_of_learners = len(stats[0]) |
|---|
| 147 | stats = filter(lambda (x, fN): fN > 0, zip(stats, fold_n)) |
|---|
| 148 | stats = [[x[lrn] / fN for x, fN in stats] |
|---|
| 149 | for lrn in range(number_of_learners)] |
|---|
| 150 | else: |
|---|
| 151 | stats = [[x / Fn for x, Fn in filter(lambda (x, Fn): Fn > 0, |
|---|
| 152 | zip(lrnD, fold_n))] for lrnD in stats] |
|---|
| 153 | |
|---|
| 154 | if not stats: |
|---|
| 155 | raise ValueError, "Cannot compute the score: no classifiers" |
|---|
| 156 | if not stats[0]: |
|---|
| 157 | raise ValueError, "Cannot compute the score: no examples or sum of weights is 0." |
|---|
| 158 | |
|---|
| 159 | if report_se: |
|---|
| 160 | return [(statc.mean(x), statc.sterr(x)) for x in stats] |
|---|
| 161 | else: |
|---|
| 162 | return [statc.mean(x) for x in stats] |
|---|
| 163 | |
|---|
| 164 | def ME(res, **argkw): |
|---|
| 165 | MEs = [0.] * res.number_of_learners |
|---|
| 166 | |
|---|
| 167 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 168 | for tex in res.results: |
|---|
| 169 | MEs = map(lambda res, cls, ac=float(tex.actual_class): |
|---|
| 170 | res + abs(float(cls) - ac), MEs, tex.classes) |
|---|
| 171 | totweight = gettotsize(res) |
|---|
| 172 | else: |
|---|
| 173 | for tex in res.results: |
|---|
| 174 | MEs = map(lambda res, cls, ac=float(tex.actual_class), tw=tex.weight: |
|---|
| 175 | res + tw * abs(float(cls) - ac), MEs, tex.classes) |
|---|
| 176 | totweight = gettotweight(res) |
|---|
| 177 | |
|---|
| 178 | return [x / totweight for x in MEs] |
|---|
| 179 | |
|---|
| 180 | MAE = ME |
|---|
| 181 | |
|---|
| 182 | |
|---|
| 183 | class ConfusionMatrix: |
|---|
| 184 | """ |
|---|
| 185 | Classification result summary. |
|---|
| 186 | """ |
|---|
| 187 | #: True Positive predictions |
|---|
| 188 | TP = 0. |
|---|
| 189 | #:True Negative predictions |
|---|
| 190 | TN = 0. |
|---|
| 191 | #:False Positive predictions |
|---|
| 192 | FP = 0. |
|---|
| 193 | #: False Negative predictions |
|---|
| 194 | FN = 0. |
|---|
| 195 | |
|---|
| 196 | @deprecated_keywords({"predictedPositive": "predicted_positive", |
|---|
| 197 | "isPositive": "is_positive"}) |
|---|
| 198 | def addTFPosNeg(self, predicted_positive, is_positive, weight=1.0): |
|---|
| 199 | """ |
|---|
| 200 | Update confusion matrix with result of a single classification. |
|---|
| 201 | |
|---|
| 202 | :param predicted_positive: positive class value was predicted |
|---|
| 203 | :param is_positive: correct class value is positive |
|---|
| 204 | :param weight: weight of the selected instance |
|---|
| 205 | """ |
|---|
| 206 | if predicted_positive: |
|---|
| 207 | if is_positive: |
|---|
| 208 | self.TP += weight |
|---|
| 209 | else: |
|---|
| 210 | self.FP += weight |
|---|
| 211 | else: |
|---|
| 212 | if is_positive: |
|---|
| 213 | self.FN += weight |
|---|
| 214 | else: |
|---|
| 215 | self.TN += weight |
|---|
| 216 | |
|---|
| 217 | |
|---|
| 218 | |
|---|
| 219 | ######################################################################### |
|---|
| 220 | # PERFORMANCE MEASURES: |
|---|
| 221 | # Scores for evaluation of numeric predictions |
|---|
| 222 | |
|---|
| 223 | def check_argkw(dct, lst): |
|---|
| 224 | """Return True if any item from lst has a non-zero value in dct.""" |
|---|
| 225 | return reduce(lambda x, y: x or y, [dct.get(k, 0) for k in lst]) |
|---|
| 226 | |
|---|
| 227 | def regression_error(res, **argkw): |
|---|
| 228 | """Return the regression error (default: MSE).""" |
|---|
| 229 | if argkw.get("SE", 0) and res.number_of_iterations > 1: |
|---|
| 230 | # computes the scores for each iteration, then averages |
|---|
| 231 | scores = [[0.] * res.number_of_iterations |
|---|
| 232 | for _ in range(res.number_of_learners)] |
|---|
| 233 | norm = None |
|---|
| 234 | if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0): |
|---|
| 235 | norm = [0.] * res.number_of_iterations |
|---|
| 236 | |
|---|
| 237 | # counts examples in each iteration |
|---|
| 238 | nIter = [0] * res.number_of_iterations |
|---|
| 239 | # average class in each iteration |
|---|
| 240 | a = [0] * res.number_of_iterations |
|---|
| 241 | for tex in res.results: |
|---|
| 242 | nIter[tex.iteration_number] += 1 |
|---|
| 243 | a[tex.iteration_number] += float(tex.actual_class) |
|---|
| 244 | a = [a[i] / nIter[i] for i in range(res.number_of_iterations)] |
|---|
| 245 | |
|---|
| 246 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 247 | # iterate accross test cases |
|---|
| 248 | for tex in res.results: |
|---|
| 249 | ai = float(tex.actual_class) |
|---|
| 250 | nIter[tex.iteration_number] += 1 |
|---|
| 251 | |
|---|
| 252 | # compute normalization, if required |
|---|
| 253 | if argkw.get("norm-abs", 0): |
|---|
| 254 | norm[tex.iteration_number] += abs(ai - a[tex.iteration_number]) |
|---|
| 255 | elif argkw.get("norm-sqr", 0): |
|---|
| 256 | norm[tex.iteration_number] += (ai - a[tex.iteration_number]) ** 2 |
|---|
| 257 | |
|---|
| 258 | # iterate accross results of different regressors |
|---|
| 259 | for i, cls in enumerate(tex.classes): |
|---|
| 260 | if argkw.get("abs", 0): |
|---|
| 261 | scores[i][tex.iteration_number] += abs(float(cls) - ai) |
|---|
| 262 | else: |
|---|
| 263 | scores[i][tex.iteration_number] += (float(cls) - ai) ** 2 |
|---|
| 264 | else: # unweighted != 0 |
|---|
| 265 | raise NotImplementedError, "weighted error scores with SE not implemented yet" |
|---|
| 266 | |
|---|
| 267 | if argkw.get("norm-abs") or argkw.get("norm-sqr"): |
|---|
| 268 | scores = [[x / n for x, n in zip(y, norm)] for y in scores] |
|---|
| 269 | else: |
|---|
| 270 | scores = [[x / ni for x, ni in zip(y, nIter)] for y in scores] |
|---|
| 271 | |
|---|
| 272 | if argkw.get("R2"): |
|---|
| 273 | scores = [[1.0 - x for x in y] for y in scores] |
|---|
| 274 | |
|---|
| 275 | if argkw.get("sqrt", 0): |
|---|
| 276 | scores = [[math.sqrt(x) for x in y] for y in scores] |
|---|
| 277 | |
|---|
| 278 | return [(statc.mean(x), statc.std(x)) for x in scores] |
|---|
| 279 | |
|---|
| 280 | else: # single iteration (testing on a single test set) |
|---|
| 281 | scores = [0.] * res.number_of_learners |
|---|
| 282 | norm = 0. |
|---|
| 283 | |
|---|
| 284 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 285 | a = sum([tex.actual_class for tex in res.results]) \ |
|---|
| 286 | / len(res.results) |
|---|
| 287 | for tex in res.results: |
|---|
| 288 | if argkw.get("abs", 0): |
|---|
| 289 | scores = map(lambda res, cls, ac=float(tex.actual_class): |
|---|
| 290 | res + abs(float(cls) - ac), scores, tex.classes) |
|---|
| 291 | else: |
|---|
| 292 | scores = map(lambda res, cls, ac=float(tex.actual_class): |
|---|
| 293 | res + (float(cls) - ac) ** 2, scores, tex.classes) |
|---|
| 294 | |
|---|
| 295 | if argkw.get("norm-abs", 0): |
|---|
| 296 | norm += abs(tex.actual_class - a) |
|---|
| 297 | elif argkw.get("norm-sqr", 0): |
|---|
| 298 | norm += (tex.actual_class - a) ** 2 |
|---|
| 299 | totweight = gettotsize(res) |
|---|
| 300 | else: |
|---|
| 301 | # UNFINISHED |
|---|
| 302 | MSEs = [0.] * res.number_of_learners |
|---|
| 303 | for tex in res.results: |
|---|
| 304 | MSEs = map(lambda res, cls, ac=float(tex.actual_class), |
|---|
| 305 | tw=tex.weight: |
|---|
| 306 | res + tw * (float(cls) - ac) ** 2, MSEs, tex.classes) |
|---|
| 307 | totweight = gettotweight(res) |
|---|
| 308 | |
|---|
| 309 | if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0): |
|---|
| 310 | scores = [s / norm for s in scores] |
|---|
| 311 | else: # normalize by number of instances (or sum of weights) |
|---|
| 312 | scores = [s / totweight for s in scores] |
|---|
| 313 | |
|---|
| 314 | if argkw.get("R2"): |
|---|
| 315 | scores = [1. - s for s in scores] |
|---|
| 316 | |
|---|
| 317 | if argkw.get("sqrt", 0): |
|---|
| 318 | scores = [math.sqrt(x) for x in scores] |
|---|
| 319 | |
|---|
| 320 | return scores |
|---|
| 321 | |
|---|
| 322 | def MSE(res, **argkw): |
|---|
| 323 | """Compute mean-squared error.""" |
|---|
| 324 | return regression_error(res, **argkw) |
|---|
| 325 | |
|---|
| 326 | def RMSE(res, **argkw): |
|---|
| 327 | """Compute root mean-squared error.""" |
|---|
| 328 | argkw.setdefault("sqrt", True) |
|---|
| 329 | return regression_error(res, **argkw) |
|---|
| 330 | |
|---|
| 331 | def MAE(res, **argkw): |
|---|
| 332 | """Compute mean absolute error.""" |
|---|
| 333 | argkw.setdefault("abs", True) |
|---|
| 334 | return regression_error(res, **argkw) |
|---|
| 335 | |
|---|
| 336 | def RSE(res, **argkw): |
|---|
| 337 | """Compute relative squared error.""" |
|---|
| 338 | argkw.setdefault("norm-sqr", True) |
|---|
| 339 | return regression_error(res, **argkw) |
|---|
| 340 | |
|---|
| 341 | def RRSE(res, **argkw): |
|---|
| 342 | """Compute relative squared error.""" |
|---|
| 343 | argkw.setdefault("norm-sqr", True) |
|---|
| 344 | argkw.setdefault("sqrt", True) |
|---|
| 345 | return regression_error(res, **argkw) |
|---|
| 346 | |
|---|
| 347 | def RAE(res, **argkw): |
|---|
| 348 | """Compute relative absolute error.""" |
|---|
| 349 | argkw.setdefault("abs", True) |
|---|
| 350 | argkw.setdefault("norm-abs", True) |
|---|
| 351 | return regression_error(res, **argkw) |
|---|
| 352 | |
|---|
| 353 | def R2(res, **argkw): |
|---|
| 354 | """Compute the coefficient of determination, R-squared.""" |
|---|
| 355 | argkw.setdefault("norm-sqr", True) |
|---|
| 356 | argkw.setdefault("R2", True) |
|---|
| 357 | return regression_error(res, **argkw) |
|---|
| 358 | |
|---|
| 359 | def MSE_old(res, **argkw): |
|---|
| 360 | """Compute mean-squared error.""" |
|---|
| 361 | if argkw.get("SE", 0) and res.number_of_iterations > 1: |
|---|
| 362 | MSEs = [[0.] * res.number_of_iterations |
|---|
| 363 | for _ in range(res.number_of_learners)] |
|---|
| 364 | nIter = [0] * res.number_of_iterations |
|---|
| 365 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 366 | for tex in res.results: |
|---|
| 367 | ac = float(tex.actual_class) |
|---|
| 368 | nIter[tex.iteration_number] += 1 |
|---|
| 369 | for i, cls in enumerate(tex.classes): |
|---|
| 370 | MSEs[i][tex.iteration_number] += (float(cls) - ac) ** 2 |
|---|
| 371 | else: |
|---|
| 372 | raise ValueError, "weighted RMSE with SE not implemented yet" |
|---|
| 373 | MSEs = [[x / ni for x, ni in zip(y, nIter)] for y in MSEs] |
|---|
| 374 | if argkw.get("sqrt", 0): |
|---|
| 375 | MSEs = [[math.sqrt(x) for x in y] for y in MSEs] |
|---|
| 376 | return [(statc.mean(x), statc.std(x)) for x in MSEs] |
|---|
| 377 | |
|---|
| 378 | else: |
|---|
| 379 | MSEs = [0.] * res.number_of_learners |
|---|
| 380 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 381 | for tex in res.results: |
|---|
| 382 | MSEs = map(lambda res, cls, ac=float(tex.actual_class): |
|---|
| 383 | res + (float(cls) - ac) ** 2, MSEs, tex.classes) |
|---|
| 384 | totweight = gettotsize(res) |
|---|
| 385 | else: |
|---|
| 386 | for tex in res.results: |
|---|
| 387 | MSEs = map(lambda res, cls, ac=float(tex.actual_class), |
|---|
| 388 | tw=tex.weight: res + tw * (float(cls) - ac) ** 2, |
|---|
| 389 | MSEs, tex.classes) |
|---|
| 390 | totweight = gettotweight(res) |
|---|
| 391 | |
|---|
| 392 | if argkw.get("sqrt", 0): |
|---|
| 393 | MSEs = [math.sqrt(x) for x in MSEs] |
|---|
| 394 | return [x / totweight for x in MSEs] |
|---|
| 395 | |
|---|
| 396 | def RMSE_old(res, **argkw): |
|---|
| 397 | """Compute root mean-squared error.""" |
|---|
| 398 | argkw.setdefault("sqrt", 1) |
|---|
| 399 | return MSE_old(res, **argkw) |
|---|
| 400 | |
|---|
| 401 | ######################################################################### |
|---|
| 402 | # PERFORMANCE MEASURES: |
|---|
| 403 | # Scores for evaluation of classifiers |
|---|
| 404 | |
|---|
| 405 | class CA(list): |
|---|
| 406 | """ |
|---|
| 407 | Compute percentage of matches between predicted and actual class. |
|---|
| 408 | |
|---|
| 409 | :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults` |
|---|
| 410 | or list of :obj:`ConfusionMatrix`. |
|---|
| 411 | :param report_se: include standard error in result. |
|---|
| 412 | :param ignore_weights: ignore instance weights. |
|---|
| 413 | :rtype: list of scores, one for each learner. |
|---|
| 414 | |
|---|
| 415 | Standard errors are estimated from deviation of CAs across folds (if |
|---|
| 416 | test_results were produced by cross_validation) or approximated under |
|---|
| 417 | the assumption of normal distribution otherwise. |
|---|
| 418 | """ |
|---|
| 419 | CONFUSION_MATRIX = 0 |
|---|
| 420 | CONFUSION_MATRIX_LIST = 1 |
|---|
| 421 | CLASSIFICATION = 2 |
|---|
| 422 | CROSS_VALIDATION = 3 |
|---|
| 423 | |
|---|
| 424 | @deprecated_keywords({"reportSE": "report_se", |
|---|
| 425 | "unweighted": "ignore_weights"}) |
|---|
| 426 | def __init__(self, test_results, report_se=False, ignore_weights=False): |
|---|
| 427 | super(CA, self).__init__() |
|---|
| 428 | self.report_se = report_se |
|---|
| 429 | self.ignore_weights = ignore_weights |
|---|
| 430 | |
|---|
| 431 | input_type = self.get_input_type(test_results) |
|---|
| 432 | if input_type == self.CONFUSION_MATRIX: |
|---|
| 433 | self[:] = [self.from_confusion_matrix(test_results)] |
|---|
| 434 | elif input_type == self.CONFUSION_MATRIX_LIST: |
|---|
| 435 | self[:] = self.from_confusion_matrix_list(test_results) |
|---|
| 436 | elif input_type == self.CLASSIFICATION: |
|---|
| 437 | self[:] = self.from_classification_results(test_results) |
|---|
| 438 | elif input_type == self.CROSS_VALIDATION: |
|---|
| 439 | self[:] = self.from_crossvalidation_results(test_results) |
|---|
| 440 | |
|---|
| 441 | def from_confusion_matrix(self, cm): |
|---|
| 442 | all_predictions = 0. |
|---|
| 443 | correct_predictions = 0. |
|---|
| 444 | if isinstance(cm, ConfusionMatrix): |
|---|
| 445 | all_predictions += cm.TP + cm.FN + cm.FP + cm.TN |
|---|
| 446 | correct_predictions += cm.TP + cm.TN |
|---|
| 447 | else: |
|---|
| 448 | for r, row in enumerate(cm): |
|---|
| 449 | for c, column in enumerate(row): |
|---|
| 450 | if r == c: |
|---|
| 451 | correct_predictions += column |
|---|
| 452 | all_predictions += column |
|---|
| 453 | |
|---|
| 454 | check_non_zero(all_predictions) |
|---|
| 455 | ca = correct_predictions / all_predictions |
|---|
| 456 | |
|---|
| 457 | if self.report_se: |
|---|
| 458 | return ca, ca * (1 - ca) / math.sqrt(all_predictions) |
|---|
| 459 | else: |
|---|
| 460 | return ca |
|---|
| 461 | |
|---|
| 462 | def from_confusion_matrix_list(self, confusion_matrices): |
|---|
| 463 | return [self.from_confusion_matrix(cm) for cm in confusion_matrices] |
|---|
| 464 | |
|---|
| 465 | def from_classification_results(self, test_results): |
|---|
| 466 | CAs = [0.] * test_results.number_of_learners |
|---|
| 467 | totweight = 0. |
|---|
| 468 | for tex in test_results.results: |
|---|
| 469 | w = 1. if self.ignore_weights else tex.weight |
|---|
| 470 | CAs = map(lambda res, cls: res + (cls == tex.actual_class and w), |
|---|
| 471 | CAs, tex.classes) |
|---|
| 472 | totweight += w |
|---|
| 473 | check_non_zero(totweight) |
|---|
| 474 | ca = [x / totweight for x in CAs] |
|---|
| 475 | |
|---|
| 476 | if self.report_se: |
|---|
| 477 | return [(x, x * (1 - x) / math.sqrt(totweight)) for x in ca] |
|---|
| 478 | else: |
|---|
| 479 | return ca |
|---|
| 480 | |
|---|
| 481 | def from_crossvalidation_results(self, test_results): |
|---|
| 482 | CAsByFold = [[0.] * test_results.number_of_iterations |
|---|
| 483 | for _ in range(test_results.number_of_learners)] |
|---|
| 484 | foldN = [0.] * test_results.number_of_iterations |
|---|
| 485 | |
|---|
| 486 | for tex in test_results.results: |
|---|
| 487 | w = 1. if self.ignore_weights else tex.weight |
|---|
| 488 | for lrn in range(test_results.number_of_learners): |
|---|
| 489 | CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn] == |
|---|
| 490 | tex.actual_class) and w |
|---|
| 491 | foldN[tex.iteration_number] += w |
|---|
| 492 | |
|---|
| 493 | return statistics_by_folds(CAsByFold, foldN, self.report_se, False) |
|---|
| 494 | |
|---|
| 495 | def get_input_type(self, test_results): |
|---|
| 496 | if isinstance(test_results, ConfusionMatrix): |
|---|
| 497 | return self.CONFUSION_MATRIX |
|---|
| 498 | elif isinstance(test_results, testing.ExperimentResults): |
|---|
| 499 | if test_results.number_of_iterations == 1: |
|---|
| 500 | return self.CLASSIFICATION |
|---|
| 501 | else: |
|---|
| 502 | return self.CROSS_VALIDATION |
|---|
| 503 | elif isinstance(test_results, list): |
|---|
| 504 | return self.CONFUSION_MATRIX_LIST |
|---|
| 505 | |
|---|
| 506 | |
|---|
| 507 | @deprecated_keywords({"reportSE": "report_se", |
|---|
| 508 | "unweighted": "ignore_weights"}) |
|---|
| 509 | def AP(res, report_se=False, ignore_weights=False, **argkw): |
|---|
| 510 | """Compute the average probability assigned to the correct class.""" |
|---|
| 511 | if res.number_of_iterations == 1: |
|---|
| 512 | APs = [0.] * res.number_of_learners |
|---|
| 513 | if ignore_weights or not res.weights: |
|---|
| 514 | for tex in res.results: |
|---|
| 515 | APs = map(lambda res, probs: res + probs[tex.actual_class], |
|---|
| 516 | APs, tex.probabilities) |
|---|
| 517 | totweight = gettotsize(res) |
|---|
| 518 | else: |
|---|
| 519 | totweight = 0. |
|---|
| 520 | for tex in res.results: |
|---|
| 521 | APs = map(lambda res, probs: res + probs[tex.actual_class] * |
|---|
| 522 | tex.weight, APs, tex.probabilities) |
|---|
| 523 | totweight += tex.weight |
|---|
| 524 | check_non_zero(totweight) |
|---|
| 525 | return [AP / totweight for AP in APs] |
|---|
| 526 | |
|---|
| 527 | APsByFold = [[0.] * res.number_of_learners |
|---|
| 528 | for _ in range(res.number_of_iterations)] |
|---|
| 529 | foldN = [0.] * res.number_of_iterations |
|---|
| 530 | if ignore_weights or not res.weights: |
|---|
| 531 | for tex in res.results: |
|---|
| 532 | APsByFold[tex.iteration_number] = map(lambda res, probs: |
|---|
| 533 | res + probs[tex.actual_class], |
|---|
| 534 | APsByFold[tex.iteration_number], tex.probabilities) |
|---|
| 535 | foldN[tex.iteration_number] += 1 |
|---|
| 536 | else: |
|---|
| 537 | for tex in res.results: |
|---|
| 538 | APsByFold[tex.iteration_number] = map(lambda res, probs: |
|---|
| 539 | res + probs[tex.actual_class] * tex.weight, |
|---|
| 540 | APsByFold[tex.iteration_number], tex.probabilities) |
|---|
| 541 | foldN[tex.iteration_number] += tex.weight |
|---|
| 542 | |
|---|
| 543 | return statistics_by_folds(APsByFold, foldN, report_se, True) |
|---|
| 544 | |
|---|
| 545 | |
|---|
| 546 | @deprecated_keywords({"reportSE": "report_se", |
|---|
| 547 | "unweighted": "ignore_weights"}) |
|---|
| 548 | def Brier_score(res, report_se=False, ignore_weights=False, **argkw): |
|---|
| 549 | """Compute the Brier score, defined as the average (over test instances) |
|---|
| 550 | of :math:`\sum_x(t(x) - p(x))^2`, where x is a class value, t(x) is 1 for |
|---|
| 551 | the actual class value and 0 otherwise, and p(x) is the predicted |
|---|
| 552 | probability of x. |
|---|
| 553 | """ |
|---|
| 554 | # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where |
|---|
| 555 | # x is class, |
|---|
| 556 | # t(x) is 0 for 'wrong' and 1 for 'correct' class |
|---|
| 557 | # p(x) is predicted probabilty. |
|---|
| 558 | # There's a trick: since t(x) is zero for all classes but the |
|---|
| 559 | # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1 |
|---|
| 560 | # Since +1 is there for each example, it adds 1 to the average |
|---|
| 561 | # We skip the +1 inside the sum and add it just at the end of the function |
|---|
| 562 | # We take max(result, 0) to avoid -0.0000x due to rounding errors |
|---|
| 563 | |
|---|
| 564 | if res.number_of_iterations == 1: |
|---|
| 565 | MSEs = [0.] * res.number_of_learners |
|---|
| 566 | if ignore_weights or not res.weights: |
|---|
| 567 | totweight = 0. |
|---|
| 568 | for tex in res.results: |
|---|
| 569 | MSEs = map(lambda res, probs: res + reduce( |
|---|
| 570 | lambda s, pi: s + pi ** 2, probs, 0) - |
|---|
| 571 | 2 * probs[tex.actual_class], MSEs, tex.probabilities) |
|---|
| 572 | totweight += tex.weight |
|---|
| 573 | else: |
|---|
| 574 | for tex in res.results: |
|---|
| 575 | MSEs = map(lambda res, probs: res + tex.weight * reduce( |
|---|
| 576 | lambda s, pi: s + pi ** 2, probs, 0) - |
|---|
| 577 | 2 * probs[tex.actual_class], MSEs, tex.probabilities) |
|---|
| 578 | totweight = gettotweight(res) |
|---|
| 579 | check_non_zero(totweight) |
|---|
| 580 | if report_se: |
|---|
| 581 | ## change this, not zero!!! |
|---|
| 582 | return [(max(x / totweight + 1., 0), 0) for x in MSEs] |
|---|
| 583 | else: |
|---|
| 584 | return [max(x / totweight + 1., 0) for x in MSEs] |
|---|
| 585 | |
|---|
| 586 | BSs = [[0.] * res.number_of_learners |
|---|
| 587 | for _ in range(res.number_of_iterations)] |
|---|
| 588 | foldN = [0.] * res.number_of_iterations |
|---|
| 589 | |
|---|
| 590 | if ignore_weights or not res.weights: |
|---|
| 591 | for tex in res.results: |
|---|
| 592 | BSs[tex.iteration_number] = map(lambda rr, probs: rr + reduce( |
|---|
| 593 | lambda s, pi: s + pi ** 2, probs, 0) - |
|---|
| 594 | 2 * probs[tex.actual_class], BSs[tex.iteration_number], |
|---|
| 595 | tex.probabilities) |
|---|
| 596 | foldN[tex.iteration_number] += 1 |
|---|
| 597 | else: |
|---|
| 598 | for tex in res.results: |
|---|
| 599 | BSs[tex.iteration_number] = map(lambda res, probs: |
|---|
| 600 | res + tex.weight * reduce(lambda s, pi: s + pi ** 2, probs, 0) - |
|---|
| 601 | 2 * probs[tex. actual_class], BSs[tex.iteration_number], |
|---|
| 602 | tex.probabilities) |
|---|
| 603 | foldN[tex.iteration_number] += tex.weight |
|---|
| 604 | |
|---|
| 605 | stats = statistics_by_folds(BSs, foldN, report_se, True) |
|---|
| 606 | if report_se: |
|---|
| 607 | return [(x + 1., y) for x, y in stats] |
|---|
| 608 | else: |
|---|
| 609 | return [x + 1. for x in stats] |
|---|
| 610 | |
|---|
| 611 | def BSS(res, **argkw): |
|---|
| 612 | return [1 - x / 2 for x in apply(Brier_score, (res,), argkw)] |
|---|
| 613 | |
|---|
| 614 | def IS_ex(Pc, P): |
|---|
| 615 | """Pc aposterior probability, P aprior""" |
|---|
| 616 | if Pc >= P: |
|---|
| 617 | return -log2(P) + log2(Pc) |
|---|
| 618 | else: |
|---|
| 619 | return -(-log2(1 - P) + log2(1 - Pc)) |
|---|
| 620 | |
|---|
| 621 | |
|---|
| 622 | @deprecated_keywords({"reportSE": "report_se"}) |
|---|
| 623 | def IS(res, apriori=None, report_se=False, **argkw): |
|---|
| 624 | """Compute the information score as defined by |
|---|
| 625 | `Kononenko and Bratko (1991) \ |
|---|
| 626 | <http://www.springerlink.com/content/g5p7473160476612/>`_. |
|---|
| 627 | Argument :obj:`apriori` gives the apriori class |
|---|
| 628 | distribution; if it is omitted, the class distribution is computed from |
|---|
| 629 | the actual classes of examples in :obj:`res`. |
|---|
| 630 | """ |
|---|
| 631 | if not apriori: |
|---|
| 632 | apriori = class_probabilities_from_res(res) |
|---|
| 633 | |
|---|
| 634 | if res.number_of_iterations == 1: |
|---|
| 635 | ISs = [0.] * res.number_of_learners |
|---|
| 636 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 637 | for tex in res.results: |
|---|
| 638 | for i in range(len(tex.probabilities)): |
|---|
| 639 | cls = tex.actual_class |
|---|
| 640 | ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) |
|---|
| 641 | totweight = gettotsize(res) |
|---|
| 642 | else: |
|---|
| 643 | for tex in res.results: |
|---|
| 644 | for i in range(len(tex.probabilities)): |
|---|
| 645 | cls = tex.actual_class |
|---|
| 646 | ISs[i] += (IS_ex(tex.probabilities[i][cls], apriori[cls]) * |
|---|
| 647 | tex.weight) |
|---|
| 648 | totweight = gettotweight(res) |
|---|
| 649 | if report_se: |
|---|
| 650 | return [(IS / totweight, 0) for IS in ISs] |
|---|
| 651 | else: |
|---|
| 652 | return [IS / totweight for IS in ISs] |
|---|
| 653 | |
|---|
| 654 | |
|---|
| 655 | ISs = [[0.] * res.number_of_iterations |
|---|
| 656 | for _ in range(res.number_of_learners)] |
|---|
| 657 | foldN = [0.] * res.number_of_iterations |
|---|
| 658 | |
|---|
| 659 | # compute info scores for each fold |
|---|
| 660 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 661 | for tex in res.results: |
|---|
| 662 | for i in range(len(tex.probabilities)): |
|---|
| 663 | cls = tex.actual_class |
|---|
| 664 | ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], |
|---|
| 665 | apriori[cls]) |
|---|
| 666 | foldN[tex.iteration_number] += 1 |
|---|
| 667 | else: |
|---|
| 668 | for tex in res.results: |
|---|
| 669 | for i in range(len(tex.probabilities)): |
|---|
| 670 | cls = tex.actual_class |
|---|
| 671 | ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], |
|---|
| 672 | apriori[cls]) * tex.weight |
|---|
| 673 | foldN[tex.iteration_number] += tex.weight |
|---|
| 674 | |
|---|
| 675 | return statistics_by_folds(ISs, foldN, report_se, False) |
|---|
| 676 | |
|---|
| 677 | |
|---|
| 678 | def Wilcoxon(res, statistics, **argkw): |
|---|
| 679 | res1, res2 = [], [] |
|---|
| 680 | for ri in split_by_iterations(res): |
|---|
| 681 | stats = statistics(ri, **argkw) |
|---|
| 682 | if len(stats) != 2: |
|---|
| 683 | raise TypeError, "Wilcoxon compares two classifiers, no more, no less" |
|---|
| 684 | res1.append(stats[0]) |
|---|
| 685 | res2.append(stats[1]) |
|---|
| 686 | return statc.wilcoxont(res1, res2) |
|---|
| 687 | |
|---|
| 688 | def rank_difference(res, statistics, **argkw): |
|---|
| 689 | if not res.results: |
|---|
| 690 | raise TypeError, "no experiments" |
|---|
| 691 | |
|---|
| 692 | k = len(res.results[0].classes) |
|---|
| 693 | if k < 2: |
|---|
| 694 | raise TypeError, "nothing to compare (less than two classifiers given)" |
|---|
| 695 | if k == 2: |
|---|
| 696 | return apply(Wilcoxon, (res, statistics), argkw) |
|---|
| 697 | else: |
|---|
| 698 | return apply(Friedman, (res, statistics), argkw) |
|---|
| 699 | |
|---|
| 700 | |
|---|
| 701 | @deprecated_keywords({"res": "test_results", |
|---|
| 702 | "classIndex": "class_index", |
|---|
| 703 | "unweighted": "ignore_weights"}) |
|---|
| 704 | def confusion_matrices(test_results, class_index= -1, |
|---|
| 705 | ignore_weights=False, cutoff=.5): |
|---|
| 706 | """ |
|---|
| 707 | Return confusion matrices for test_results. |
|---|
| 708 | |
|---|
| 709 | :param test_results: test results |
|---|
| 710 | :param class_index: index of class value for which the confusion matrices |
|---|
| 711 | are to be computed (by default unspecified - see note below). |
|---|
| 712 | :param ignore_weights: ignore instance weights. |
|---|
| 713 | :param cutoff: cutoff for probability |
|---|
| 714 | |
|---|
| 715 | :rtype: list of :obj:`ConfusionMatrix` or list-of-list-of-lists (see |
|---|
| 716 | note below) |
|---|
| 717 | |
|---|
| 718 | .. note:: If `class_index` is not specified and `test_results` |
|---|
| 719 | contain predictions for multi-class problem, then the return |
|---|
| 720 | value is a list of 2D tables (list-of-lists) of all class |
|---|
| 721 | value pairwise misclassifications. |
|---|
| 722 | |
|---|
| 723 | """ |
|---|
| 724 | tfpns = [ConfusionMatrix() for _ in range(test_results.number_of_learners)] |
|---|
| 725 | |
|---|
| 726 | if class_index < 0: |
|---|
| 727 | numberOfClasses = len(test_results.class_values) |
|---|
| 728 | if class_index < -1 or numberOfClasses > 2: |
|---|
| 729 | cm = [[[0.] * numberOfClasses for _ in range(numberOfClasses)] |
|---|
| 730 | for _ in range(test_results.number_of_learners)] |
|---|
| 731 | if ignore_weights or not test_results.weights: |
|---|
| 732 | for tex in test_results.results: |
|---|
| 733 | trueClass = int(tex.actual_class) |
|---|
| 734 | for li, pred in enumerate(tex.classes): |
|---|
| 735 | predClass = int(pred) |
|---|
| 736 | if predClass < numberOfClasses: |
|---|
| 737 | cm[li][trueClass][predClass] += 1 |
|---|
| 738 | else: |
|---|
| 739 | for tex in test_results.results: |
|---|
| 740 | trueClass = int(tex.actual_class) |
|---|
| 741 | for li, pred in tex.classes: |
|---|
| 742 | predClass = int(pred) |
|---|
| 743 | if predClass < numberOfClasses: |
|---|
| 744 | cm[li][trueClass][predClass] += tex.weight |
|---|
| 745 | return cm |
|---|
| 746 | |
|---|
| 747 | elif test_results.base_class >= 0: |
|---|
| 748 | class_index = test_results.base_class |
|---|
| 749 | else: |
|---|
| 750 | class_index = 1 |
|---|
| 751 | |
|---|
| 752 | if cutoff != .5: |
|---|
| 753 | if ignore_weights or not test_results.weights: |
|---|
| 754 | for lr in test_results.results: |
|---|
| 755 | isPositive = (lr.actual_class == class_index) |
|---|
| 756 | for i in range(test_results.number_of_learners): |
|---|
| 757 | tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] > |
|---|
| 758 | cutoff, isPositive) |
|---|
| 759 | else: |
|---|
| 760 | for lr in test_results.results: |
|---|
| 761 | isPositive = (lr.actual_class == class_index) |
|---|
| 762 | for i in range(test_results.number_of_learners): |
|---|
| 763 | tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] > |
|---|
| 764 | cutoff, isPositive, lr.weight) |
|---|
| 765 | else: |
|---|
| 766 | if ignore_weights or not test_results.weights: |
|---|
| 767 | for lr in test_results.results: |
|---|
| 768 | isPositive = (lr.actual_class == class_index) |
|---|
| 769 | for i in range(test_results.number_of_learners): |
|---|
| 770 | tfpns[i].addTFPosNeg(lr.classes[i] == class_index, |
|---|
| 771 | isPositive) |
|---|
| 772 | else: |
|---|
| 773 | for lr in test_results.results: |
|---|
| 774 | isPositive = (lr.actual_class == class_index) |
|---|
| 775 | for i in range(test_results.number_of_learners): |
|---|
| 776 | tfpns[i].addTFPosNeg(lr.classes[i] == class_index, |
|---|
| 777 | isPositive, lr.weight) |
|---|
| 778 | return tfpns |
|---|
| 779 | |
|---|
| 780 | |
|---|
| 781 | # obsolete (renamed) |
|---|
| 782 | compute_confusion_matrices = confusion_matrices |
|---|
| 783 | |
|---|
| 784 | |
|---|
| 785 | @deprecated_keywords({"confusionMatrix": "confusion_matrix"}) |
|---|
| 786 | def confusion_chi_square(confusion_matrix): |
|---|
| 787 | """ |
|---|
| 788 | Return chi square statistic of the confusion matrix |
|---|
| 789 | (higher value indicates that prediction is not by chance). |
|---|
| 790 | """ |
|---|
| 791 | if isinstance(confusion_matrix, ConfusionMatrix) or \ |
|---|
| 792 | not isinstance(confusion_matrix[1], list): |
|---|
| 793 | return _confusion_chi_square(confusion_matrix) |
|---|
| 794 | else: |
|---|
| 795 | return map(_confusion_chi_square, confusion_matrix) |
|---|
| 796 | |
|---|
| 797 | def _confusion_chi_square(confusion_matrix): |
|---|
| 798 | if isinstance(confusion_matrix, ConfusionMatrix): |
|---|
| 799 | c = confusion_matrix |
|---|
| 800 | confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]] |
|---|
| 801 | dim = len(confusion_matrix) |
|---|
| 802 | rowPriors = [sum(r) for r in confusion_matrix] |
|---|
| 803 | colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)] |
|---|
| 804 | total = sum(rowPriors) |
|---|
| 805 | rowPriors = [r / total for r in rowPriors] |
|---|
| 806 | colPriors = [r / total for r in colPriors] |
|---|
| 807 | ss = 0 |
|---|
| 808 | for ri, row in enumerate(confusion_matrix): |
|---|
| 809 | for ci, o in enumerate(row): |
|---|
| 810 | e = total * rowPriors[ri] * colPriors[ci] |
|---|
| 811 | if not e: |
|---|
| 812 | return -1, -1, -1 |
|---|
| 813 | ss += (o - e) ** 2 / e |
|---|
| 814 | df = (dim - 1) ** 2 |
|---|
| 815 | return ss, df, statc.chisqprob(ss, df) |
|---|
| 816 | |
|---|
| 817 | class CMScore(list): |
|---|
| 818 | """ |
|---|
| 819 | :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults` |
|---|
| 820 | or list of :obj:`ConfusionMatrix`. |
|---|
| 821 | :rtype: list of scores, one for each learner.""" |
|---|
| 822 | def __new__(cls, test_results, **kwargs): |
|---|
| 823 | self = list.__new__(cls) |
|---|
| 824 | if isinstance(test_results, ConfusionMatrix): |
|---|
| 825 | self.__init__(test_results, **kwargs) |
|---|
| 826 | return self[0] |
|---|
| 827 | return self |
|---|
| 828 | |
|---|
| 829 | |
|---|
| 830 | @deprecated_keywords({"confm": "test_results"}) |
|---|
| 831 | def __init__(self, test_results=None): |
|---|
| 832 | super(CMScore, self).__init__() |
|---|
| 833 | |
|---|
| 834 | if test_results is not None: |
|---|
| 835 | self[:] = self.__call__(test_results) |
|---|
| 836 | |
|---|
| 837 | def __call__(self, test_results): |
|---|
| 838 | if isinstance(test_results, testing.ExperimentResults): |
|---|
| 839 | test_results = confusion_matrices(test_results, class_index=1) |
|---|
| 840 | if isinstance(test_results, ConfusionMatrix): |
|---|
| 841 | test_results = [test_results] |
|---|
| 842 | |
|---|
| 843 | return map(self.compute, test_results) |
|---|
| 844 | |
|---|
| 845 | |
|---|
| 846 | |
|---|
| 847 | class Sensitivity(CMScore): |
|---|
| 848 | __doc__ = """Compute `sensitivity |
|---|
| 849 | <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ (proportion |
|---|
| 850 | of actual positives which are correctly identified as such). |
|---|
| 851 | """ + CMScore.__doc__ |
|---|
| 852 | @classmethod |
|---|
| 853 | def compute(self, confusion_matrix): |
|---|
| 854 | tot = confusion_matrix.TP + confusion_matrix.FN |
|---|
| 855 | if tot < 1e-6: |
|---|
| 856 | import warnings |
|---|
| 857 | warnings.warn("Can't compute sensitivity: one or both classes have no instances") |
|---|
| 858 | return None |
|---|
| 859 | |
|---|
| 860 | return confusion_matrix.TP / tot |
|---|
| 861 | |
|---|
| 862 | |
|---|
| 863 | class Recall(Sensitivity): |
|---|
| 864 | __doc__ = """ Compute `recall |
|---|
| 865 | <http://en.wikipedia.org/wiki/Precision_and_recall>`_ |
|---|
| 866 | (fraction of relevant instances that are retrieved). |
|---|
| 867 | """ + CMScore.__doc__ |
|---|
| 868 | pass # Recall == Sensitivity |
|---|
| 869 | |
|---|
| 870 | |
|---|
| 871 | class Specificity(CMScore): |
|---|
| 872 | __doc__ = """Compute `specificity |
|---|
| 873 | <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ |
|---|
| 874 | (proportion of negatives which are correctly identified). |
|---|
| 875 | """ + CMScore.__doc__ |
|---|
| 876 | @classmethod |
|---|
| 877 | def compute(self, confusion_matrix): |
|---|
| 878 | tot = confusion_matrix.FP + confusion_matrix.TN |
|---|
| 879 | if tot < 1e-6: |
|---|
| 880 | import warnings |
|---|
| 881 | warnings.warn("Can't compute specificity: one or both classes have no instances") |
|---|
| 882 | return None |
|---|
| 883 | return confusion_matrix.TN / tot |
|---|
| 884 | |
|---|
| 885 | |
|---|
| 886 | class PPV(CMScore): |
|---|
| 887 | __doc__ = """Compute `positive predictive value |
|---|
| 888 | <http://en.wikipedia.org/wiki/Positive_predictive_value>`_ (proportion of |
|---|
| 889 | subjects with positive test results who are correctly diagnosed). |
|---|
| 890 | """ + CMScore.__doc__ |
|---|
| 891 | @classmethod |
|---|
| 892 | def compute(self, confusion_matrix): |
|---|
| 893 | tot = confusion_matrix.TP + confusion_matrix.FP |
|---|
| 894 | if tot < 1e-6: |
|---|
| 895 | import warnings |
|---|
| 896 | warnings.warn("Can't compute PPV: one or both classes have no instances") |
|---|
| 897 | return None |
|---|
| 898 | return confusion_matrix.TP / tot |
|---|
| 899 | |
|---|
| 900 | |
|---|
| 901 | class Precision(PPV): |
|---|
| 902 | __doc__ = """Compute `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_ |
|---|
| 903 | (retrieved instances that are relevant). |
|---|
| 904 | """ + CMScore.__doc__ |
|---|
| 905 | pass # Precision == PPV |
|---|
| 906 | |
|---|
| 907 | |
|---|
| 908 | class NPV(CMScore): |
|---|
| 909 | __doc__ = """Compute `negative predictive value |
|---|
| 910 | <http://en.wikipedia.org/wiki/Negative_predictive_value>`_ (proportion of |
|---|
| 911 | subjects with a negative test result who are correctly diagnosed). |
|---|
| 912 | """ + CMScore.__doc__ |
|---|
| 913 | @classmethod |
|---|
| 914 | def compute(self, confusion_matrix): |
|---|
| 915 | tot = confusion_matrix.FN + confusion_matrix.TN |
|---|
| 916 | if tot < 1e-6: |
|---|
| 917 | import warnings |
|---|
| 918 | warnings.warn("Can't compute NPV: one or both classes have no instances") |
|---|
| 919 | return None |
|---|
| 920 | return confusion_matrix.TN / tot |
|---|
| 921 | |
|---|
| 922 | |
|---|
| 923 | class F1(CMScore): |
|---|
| 924 | __doc__ = """Return `F1 score |
|---|
| 925 | <http://en.wikipedia.org/wiki/F1_score>`_ |
|---|
| 926 | (harmonic mean of precision and recall). |
|---|
| 927 | """ + CMScore.__doc__ |
|---|
| 928 | @classmethod |
|---|
| 929 | def compute(self, confusion_matrix): |
|---|
| 930 | p = Precision.compute(confusion_matrix) |
|---|
| 931 | r = Recall.compute(confusion_matrix) |
|---|
| 932 | if p is not None and r is not None and (p + r) != 0: |
|---|
| 933 | return 2. * p * r / (p + r) |
|---|
| 934 | else: |
|---|
| 935 | import warnings |
|---|
| 936 | warnings.warn("Can't compute F1: P + R is zero or not defined") |
|---|
| 937 | return None |
|---|
| 938 | |
|---|
| 939 | |
|---|
| 940 | class Falpha(CMScore): |
|---|
| 941 | __doc__ = """Compute the alpha-mean of precision and recall over the given confusion |
|---|
| 942 | matrix. |
|---|
| 943 | """ + CMScore.__doc__ |
|---|
| 944 | |
|---|
| 945 | def __init__(self, test_results, alpha=1.): |
|---|
| 946 | self.alpha = alpha |
|---|
| 947 | super(Falpha, self).__init__(test_results) |
|---|
| 948 | |
|---|
| 949 | def compute(self, confusion_matrix): |
|---|
| 950 | p = Precision.compute(confusion_matrix) |
|---|
| 951 | r = Recall.compute(confusion_matrix) |
|---|
| 952 | return (1. + self.alpha) * p * r / (self.alpha * p + r) |
|---|
| 953 | |
|---|
| 954 | |
|---|
| 955 | class MCC(CMScore): |
|---|
| 956 | __doc__ = """Compute `Matthew correlation coefficient |
|---|
| 957 | <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_ |
|---|
| 958 | (correlation coefficient between the observed and predicted binary |
|---|
| 959 | classifications). |
|---|
| 960 | """ + CMScore.__doc__ |
|---|
| 961 | @classmethod |
|---|
| 962 | def compute(self, cm): |
|---|
| 963 | # code by Boris Gorelik |
|---|
| 964 | TP, TN, FP, FN = cm.TP, cm.TN, cm.FP, cm.FN |
|---|
| 965 | |
|---|
| 966 | try: |
|---|
| 967 | return (TP * TN - FP * FN) / \ |
|---|
| 968 | math.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) |
|---|
| 969 | except ZeroDivisionError: |
|---|
| 970 | # Zero division occurs when there is either no true positives |
|---|
| 971 | # or no true negatives i.e. the problem contains only one |
|---|
| 972 | # type of classes. |
|---|
| 973 | import warnings |
|---|
| 974 | warnings.warn("Can't compute MCC: TP or TN is zero or not defined") |
|---|
| 975 | |
|---|
| 976 | |
|---|
| 977 | @deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"}) |
|---|
| 978 | def scotts_pi(confusion_matrix, b_is_list_of_matrices=True): |
|---|
| 979 | """Compute Scott's Pi for measuring inter-rater agreement for nominal data |
|---|
| 980 | |
|---|
| 981 | http://en.wikipedia.org/wiki/Scott%27s_Pi |
|---|
| 982 | Scott's Pi is a statistic for measuring inter-rater reliability for nominal |
|---|
| 983 | raters. |
|---|
| 984 | |
|---|
| 985 | @param confusion_matrix: confusion matrix, or list of confusion matrices. |
|---|
| 986 | To obtain non-binary confusion matrix, call |
|---|
| 987 | Orange.evaluation.scoring.confusion_matrices |
|---|
| 988 | and set the class_index parameter to -2. |
|---|
| 989 | @param b_is_list_of_matrices: specifies whether confm is list of matrices. |
|---|
| 990 | This function needs to operate on non-binary |
|---|
| 991 | confusion matrices, which are represented by python |
|---|
| 992 | lists, therefore one needs a way to distinguish |
|---|
| 993 | between a single matrix and list of matrices |
|---|
| 994 | """ |
|---|
| 995 | |
|---|
| 996 | if b_is_list_of_matrices: |
|---|
| 997 | try: |
|---|
| 998 | return [scotts_pi(cm, b_is_list_of_matrices=False) |
|---|
| 999 | for cm in confusion_matrix] |
|---|
| 1000 | except TypeError: |
|---|
| 1001 | # Nevermind the parameter, maybe this is a "conventional" binary |
|---|
| 1002 | # confusion matrix and bIsListOfMatrices was specified by mistake |
|---|
| 1003 | return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False) |
|---|
| 1004 | else: |
|---|
| 1005 | if isinstance(confusion_matrix, ConfusionMatrix): |
|---|
| 1006 | confusion_matrix = numpy.array([[confusion_matrix.TP, |
|---|
| 1007 | confusion_matrix.FN], [confusion_matrix.FP, |
|---|
| 1008 | confusion_matrix.TN]], dtype=float) |
|---|
| 1009 | else: |
|---|
| 1010 | confusion_matrix = numpy.array(confusion_matrix, dtype=float) |
|---|
| 1011 | |
|---|
| 1012 | marginalSumOfRows = numpy.sum(confusion_matrix, axis=0) |
|---|
| 1013 | marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1) |
|---|
| 1014 | jointProportion = (marginalSumOfColumns + marginalSumOfRows) / \ |
|---|
| 1015 | (2. * numpy.sum(confusion_matrix)) |
|---|
| 1016 | # In the eq. above, 2. is what the Wikipedia page calls |
|---|
| 1017 | # the number of annotators. Here we have two annotators: |
|---|
| 1018 | # the observed (true) labels (annotations) and the predicted by |
|---|
| 1019 | # the learners. |
|---|
| 1020 | |
|---|
| 1021 | prExpected = numpy.sum(jointProportion ** 2) |
|---|
| 1022 | prActual = numpy.sum(numpy.diag(confusion_matrix)) / \ |
|---|
| 1023 | numpy.sum(confusion_matrix) |
|---|
| 1024 | |
|---|
| 1025 | ret = (prActual - prExpected) / (1.0 - prExpected) |
|---|
| 1026 | return ret |
|---|
| 1027 | |
|---|
| 1028 | # Backward compatibility |
|---|
| 1029 | sens = Sensitivity |
|---|
| 1030 | spec = Specificity |
|---|
| 1031 | precision = Precision |
|---|
| 1032 | recall = Recall |
|---|
| 1033 | |
|---|
| 1034 | |
|---|
| 1035 | |
|---|
| 1036 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1037 | "unweighted": "ignore_weights"}) |
|---|
| 1038 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1039 | def AUCWilcoxon(res, class_index= -1, ignore_weights=False, **argkw): |
|---|
| 1040 | """Compute the area under ROC (AUC) and its standard error using |
|---|
| 1041 | Wilcoxon's approach proposed by Hanley and McNeal (1982). If |
|---|
| 1042 | :obj:`class_index` is not specified, the first class is used as |
|---|
| 1043 | "the positive" and others are negative. The result is a list of |
|---|
| 1044 | tuples (aROC, standard error). |
|---|
| 1045 | |
|---|
| 1046 | If test results consist of multiple folds, you need to split them using |
|---|
| 1047 | :obj:`split_by_iterations` and perform this test on each fold separately. |
|---|
| 1048 | """ |
|---|
| 1049 | useweights = res.weights and not ignore_weights |
|---|
| 1050 | problists, tots = corn.computeROCCumulative(res, class_index, useweights) |
|---|
| 1051 | |
|---|
| 1052 | results = [] |
|---|
| 1053 | |
|---|
| 1054 | totPos, totNeg = tots[1], tots[0] |
|---|
| 1055 | N = totPos + totNeg |
|---|
| 1056 | for plist in problists: |
|---|
| 1057 | highPos, lowNeg = totPos, 0. |
|---|
| 1058 | W, Q1, Q2 = 0., 0., 0. |
|---|
| 1059 | for prob in plist: |
|---|
| 1060 | thisPos, thisNeg = prob[1][1], prob[1][0] |
|---|
| 1061 | highPos -= thisPos |
|---|
| 1062 | W += thisNeg * (highPos + thisPos / 2.) |
|---|
| 1063 | Q2 += thisPos * (lowNeg ** 2 + lowNeg * thisNeg + thisNeg ** 2 / 3.) |
|---|
| 1064 | Q1 += thisNeg * (highPos ** 2 + highPos * thisPos + thisPos ** 2 / 3.) |
|---|
| 1065 | |
|---|
| 1066 | lowNeg += thisNeg |
|---|
| 1067 | |
|---|
| 1068 | W /= (totPos * totNeg) |
|---|
| 1069 | Q1 /= (totNeg * totPos ** 2) |
|---|
| 1070 | Q2 /= (totPos * totNeg ** 2) |
|---|
| 1071 | |
|---|
| 1072 | SE = math.sqrt((W * (1 - W) + (totPos - 1) * (Q1 - W ** 2) + |
|---|
| 1073 | (totNeg - 1) * (Q2 - W ** 2)) / (totPos * totNeg)) |
|---|
| 1074 | results.append((W, SE)) |
|---|
| 1075 | return results |
|---|
| 1076 | |
|---|
| 1077 | AROC = AUCWilcoxon # for backward compatibility, AROC is obsolete |
|---|
| 1078 | |
|---|
| 1079 | |
|---|
| 1080 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1081 | "unweighted": "ignore_weights"}) |
|---|
| 1082 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1083 | def compare_2_AUCs(res, lrn1, lrn2, class_index= -1, |
|---|
| 1084 | ignore_weights=False, **argkw): |
|---|
| 1085 | return corn.compare2ROCs(res, lrn1, lrn2, class_index, |
|---|
| 1086 | res.weights and not ignore_weights) |
|---|
| 1087 | |
|---|
| 1088 | # for backward compatibility, compare_2_AROCs is obsolete |
|---|
| 1089 | compare_2_AROCs = compare_2_AUCs |
|---|
| 1090 | |
|---|
| 1091 | |
|---|
| 1092 | @deprecated_keywords({"classIndex": "class_index"}) |
|---|
| 1093 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1094 | def compute_ROC(res, class_index= -1): |
|---|
| 1095 | """Compute a ROC curve as a list of (x, y) tuples, where x is |
|---|
| 1096 | 1-specificity and y is sensitivity. |
|---|
| 1097 | """ |
|---|
| 1098 | problists, tots = corn.computeROCCumulative(res, class_index) |
|---|
| 1099 | |
|---|
| 1100 | results = [] |
|---|
| 1101 | totPos, totNeg = tots[1], tots[0] |
|---|
| 1102 | |
|---|
| 1103 | for plist in problists: |
|---|
| 1104 | curve = [(1., 1.)] |
|---|
| 1105 | TP, TN = totPos, 0. |
|---|
| 1106 | FN, FP = 0., totNeg |
|---|
| 1107 | for prob in plist: |
|---|
| 1108 | thisPos, thisNeg = prob[1][1], prob[1][0] |
|---|
| 1109 | # thisPos go from TP to FN |
|---|
| 1110 | TP -= thisPos |
|---|
| 1111 | FN += thisPos |
|---|
| 1112 | # thisNeg go from FP to TN |
|---|
| 1113 | TN += thisNeg |
|---|
| 1114 | FP -= thisNeg |
|---|
| 1115 | |
|---|
| 1116 | sens = TP / (TP + FN) |
|---|
| 1117 | spec = TN / (FP + TN) |
|---|
| 1118 | curve.append((1 - spec, sens)) |
|---|
| 1119 | results.append(curve) |
|---|
| 1120 | |
|---|
| 1121 | return results |
|---|
| 1122 | |
|---|
| 1123 | ## TC's implementation of algorithms, taken from: |
|---|
| 1124 | ## T Fawcett: ROC Graphs: Notes and Practical Considerations for |
|---|
| 1125 | ## Data Mining Researchers, submitted to KDD Journal. |
|---|
| 1126 | def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)): |
|---|
| 1127 | if P1x == P2x: |
|---|
| 1128 | return 1e300 |
|---|
| 1129 | return (P1y - P2y) / (P1x - P2x) |
|---|
| 1130 | |
|---|
| 1131 | |
|---|
| 1132 | @deprecated_keywords({"keepConcavities": "keep_concavities"}) |
|---|
| 1133 | def ROC_add_point(P, R, keep_concavities=1): |
|---|
| 1134 | if keep_concavities: |
|---|
| 1135 | R.append(P) |
|---|
| 1136 | else: |
|---|
| 1137 | while True: |
|---|
| 1138 | if len(R) < 2: |
|---|
| 1139 | R.append(P) |
|---|
| 1140 | return R |
|---|
| 1141 | else: |
|---|
| 1142 | T = R.pop() |
|---|
| 1143 | T2 = R[-1] |
|---|
| 1144 | if ROC_slope(T2, T) > ROC_slope(T, P): |
|---|
| 1145 | R.append(T) |
|---|
| 1146 | R.append(P) |
|---|
| 1147 | return R |
|---|
| 1148 | return R |
|---|
| 1149 | |
|---|
| 1150 | |
|---|
| 1151 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1152 | "keepConcavities": "keep_concavities"}) |
|---|
| 1153 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1154 | def TC_compute_ROC(res, class_index= -1, keep_concavities=1): |
|---|
| 1155 | problists, tots = corn.computeROCCumulative(res, class_index) |
|---|
| 1156 | |
|---|
| 1157 | results = [] |
|---|
| 1158 | P, N = tots[1], tots[0] |
|---|
| 1159 | |
|---|
| 1160 | for plist in problists: |
|---|
| 1161 | ## corn gives an increasing by scores list, we need a decreasing |
|---|
| 1162 | plist.reverse() |
|---|
| 1163 | TP = 0. |
|---|
| 1164 | FP = 0. |
|---|
| 1165 | curve = [] |
|---|
| 1166 | fPrev = 10e300 # "infinity" score at 0., 0. |
|---|
| 1167 | for prob in plist: |
|---|
| 1168 | f = prob[0] |
|---|
| 1169 | if f != fPrev: |
|---|
| 1170 | if P: |
|---|
| 1171 | tpr = TP / P |
|---|
| 1172 | else: |
|---|
| 1173 | tpr = 0. |
|---|
| 1174 | if N: |
|---|
| 1175 | fpr = FP / N |
|---|
| 1176 | else: |
|---|
| 1177 | fpr = 0. |
|---|
| 1178 | curve = ROC_add_point((fpr, tpr, fPrev), curve, |
|---|
| 1179 | keep_concavities) |
|---|
| 1180 | fPrev = f |
|---|
| 1181 | thisPos, thisNeg = prob[1][1], prob[1][0] |
|---|
| 1182 | TP += thisPos |
|---|
| 1183 | FP += thisNeg |
|---|
| 1184 | if P: |
|---|
| 1185 | tpr = TP / P |
|---|
| 1186 | else: |
|---|
| 1187 | tpr = 0. |
|---|
| 1188 | if N: |
|---|
| 1189 | fpr = FP / N |
|---|
| 1190 | else: |
|---|
| 1191 | fpr = 0. |
|---|
| 1192 | curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly |
|---|
| 1193 | results.append(curve) |
|---|
| 1194 | |
|---|
| 1195 | return results |
|---|
| 1196 | |
|---|
| 1197 | ## returns a list of points at the intersection of the tangential |
|---|
| 1198 | ## iso-performance line and the given ROC curve |
|---|
| 1199 | ## for given values of FPcost, FNcost and pval |
|---|
| 1200 | def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve): |
|---|
| 1201 | m = (FPcost * (1. - pval)) / (FNcost * pval) |
|---|
| 1202 | |
|---|
| 1203 | ## put the iso-performance line in point (0., 1.) |
|---|
| 1204 | x0, y0 = (0., 1.) |
|---|
| 1205 | x1, y1 = (1., 1. + m) |
|---|
| 1206 | d01 = math.sqrt((x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0)) |
|---|
| 1207 | |
|---|
| 1208 | ## calculate and find the closest point to the line |
|---|
| 1209 | firstp = 1 |
|---|
| 1210 | mind = 0. |
|---|
| 1211 | a = x0 * y1 - x1 * y0 |
|---|
| 1212 | closestPoints = [] |
|---|
| 1213 | for (x, y, fscore) in curve: |
|---|
| 1214 | d = ((y0 - y1) * x + (x1 - x0) * y + a) / d01 |
|---|
| 1215 | d = abs(d) |
|---|
| 1216 | if firstp or d < mind: |
|---|
| 1217 | mind, firstp = d, 0 |
|---|
| 1218 | closestPoints = [(x, y, fscore)] |
|---|
| 1219 | else: |
|---|
| 1220 | if abs(d - mind) <= 0.0001: ## close enough |
|---|
| 1221 | closestPoints.append((x, y, fscore)) |
|---|
| 1222 | return closestPoints |
|---|
| 1223 | |
|---|
| 1224 | def frange(start, end=None, inc=None): |
|---|
| 1225 | """A range function, that does accept float increments...""" |
|---|
| 1226 | |
|---|
| 1227 | if end is None: |
|---|
| 1228 | end = start + 0. |
|---|
| 1229 | start = 0. |
|---|
| 1230 | |
|---|
| 1231 | if inc is None or inc == 0: |
|---|
| 1232 | inc = 1. |
|---|
| 1233 | |
|---|
| 1234 | L = [start] |
|---|
| 1235 | while 1: |
|---|
| 1236 | next = start + len(L) * inc |
|---|
| 1237 | if inc > 0 and next >= end: |
|---|
| 1238 | L.append(end) |
|---|
| 1239 | break |
|---|
| 1240 | elif inc < 0 and next <= end: |
|---|
| 1241 | L.append(end) |
|---|
| 1242 | break |
|---|
| 1243 | L.append(next) |
|---|
| 1244 | |
|---|
| 1245 | return L |
|---|
| 1246 | |
|---|
| 1247 | ## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN], |
|---|
| 1248 | ## where ROCcurvesX is a set of ROC curves, |
|---|
| 1249 | ## where a (one) ROC curve is a set of (FP, TP) points |
|---|
| 1250 | ## |
|---|
| 1251 | ## for each (sub)set of input ROC curves |
|---|
| 1252 | ## returns the average ROC curve and an array of (vertical) standard deviations |
|---|
| 1253 | @deprecated_keywords({"ROCcurves": "roc_curves"}) |
|---|
| 1254 | def TC_vertical_average_ROC(roc_curves, samples=10): |
|---|
| 1255 | def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X): |
|---|
| 1256 | if (P1x == P2x) or P1x < X > P2x or P1x > X < P2x: |
|---|
| 1257 | raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X) |
|---|
| 1258 | dx = float(P2x) - float(P1x) |
|---|
| 1259 | dy = float(P2y) - float(P1y) |
|---|
| 1260 | m = dy / dx |
|---|
| 1261 | return P1y + m * (X - P1x) |
|---|
| 1262 | |
|---|
| 1263 | def TP_FOR_FP(FPsample, ROC, npts): |
|---|
| 1264 | i = 0 |
|---|
| 1265 | while i < npts - 1: |
|---|
| 1266 | (fp, _, _) = ROC[i + 1] |
|---|
| 1267 | if fp <= FPsample: |
|---|
| 1268 | i += 1 |
|---|
| 1269 | else: |
|---|
| 1270 | break |
|---|
| 1271 | (fp, tp, _) = ROC[i] |
|---|
| 1272 | if fp == FPsample: |
|---|
| 1273 | return tp |
|---|
| 1274 | elif fp < FPsample and i + 1 < len(ROC): |
|---|
| 1275 | return INTERPOLATE(ROC[i], ROC[i + 1], FPsample) |
|---|
| 1276 | elif fp < FPsample and i + 1 == len(ROC): # return the last |
|---|
| 1277 | return ROC[i][1] |
|---|
| 1278 | raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC" |
|---|
| 1279 | #return 0. |
|---|
| 1280 | |
|---|
| 1281 | average = [] |
|---|
| 1282 | stdev = [] |
|---|
| 1283 | for ROCS in roc_curves: |
|---|
| 1284 | npts = [] |
|---|
| 1285 | for c in ROCS: |
|---|
| 1286 | npts.append(len(c)) |
|---|
| 1287 | nrocs = len(ROCS) |
|---|
| 1288 | |
|---|
| 1289 | TPavg = [] |
|---|
| 1290 | TPstd = [] |
|---|
| 1291 | for FPsample in frange(0., 1., 1. / samples): |
|---|
| 1292 | TPsum = [] |
|---|
| 1293 | for i in range(nrocs): |
|---|
| 1294 | ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i]) |
|---|
| 1295 | TPsum.append(TP_FOR_FP(FPsample, ROCS[i], npts[i])) |
|---|
| 1296 | TPavg.append((FPsample, statc.mean(TPsum))) |
|---|
| 1297 | if len(TPsum) > 1: |
|---|
| 1298 | stdv = statc.std(TPsum) |
|---|
| 1299 | else: |
|---|
| 1300 | stdv = 0. |
|---|
| 1301 | TPstd.append(stdv) |
|---|
| 1302 | |
|---|
| 1303 | average.append(TPavg) |
|---|
| 1304 | stdev.append(TPstd) |
|---|
| 1305 | |
|---|
| 1306 | return average, stdev |
|---|
| 1307 | |
|---|
| 1308 | ## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN], |
|---|
| 1309 | ## where ROCcurvesX is a set of ROC curves, |
|---|
| 1310 | ## where a (one) ROC curve is a set of (FP, TP) points |
|---|
| 1311 | ## |
|---|
| 1312 | ## for each (sub)set of input ROC curves |
|---|
| 1313 | ## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations |
|---|
| 1314 | @deprecated_keywords({"ROCcurves": "roc_curves"}) |
|---|
| 1315 | def TC_threshold_average_ROC(roc_curves, samples=10): |
|---|
| 1316 | def POINT_AT_THRESH(ROC, npts, thresh): |
|---|
| 1317 | i = 0 |
|---|
| 1318 | while i < npts - 1: |
|---|
| 1319 | (px, py, pfscore) = ROC[i] |
|---|
| 1320 | if pfscore > thresh: |
|---|
| 1321 | i += 1 |
|---|
| 1322 | else: |
|---|
| 1323 | break |
|---|
| 1324 | return ROC[i] |
|---|
| 1325 | |
|---|
| 1326 | average = [] |
|---|
| 1327 | stdevV = [] |
|---|
| 1328 | stdevH = [] |
|---|
| 1329 | for ROCS in roc_curves: |
|---|
| 1330 | npts = [] |
|---|
| 1331 | for c in ROCS: |
|---|
| 1332 | npts.append(len(c)) |
|---|
| 1333 | nrocs = len(ROCS) |
|---|
| 1334 | |
|---|
| 1335 | T = [] |
|---|
| 1336 | for c in ROCS: |
|---|
| 1337 | for (px, py, pfscore) in c: |
|---|
| 1338 | ## try: |
|---|
| 1339 | ## T.index(pfscore) |
|---|
| 1340 | ## except: |
|---|
| 1341 | T.append(pfscore) |
|---|
| 1342 | T.sort() |
|---|
| 1343 | T.reverse() ## ugly |
|---|
| 1344 | |
|---|
| 1345 | TPavg = [] |
|---|
| 1346 | TPstdV = [] |
|---|
| 1347 | TPstdH = [] |
|---|
| 1348 | for tidx in frange(0, (len(T) - 1.), float(len(T)) / samples): |
|---|
| 1349 | FPsum = [] |
|---|
| 1350 | TPsum = [] |
|---|
| 1351 | for i in range(nrocs): |
|---|
| 1352 | (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)]) |
|---|
| 1353 | FPsum.append(fp) |
|---|
| 1354 | TPsum.append(tp) |
|---|
| 1355 | TPavg.append((statc.mean(FPsum), statc.mean(TPsum))) |
|---|
| 1356 | ## vertical standard deviation |
|---|
| 1357 | if len(TPsum) > 1: |
|---|
| 1358 | stdv = statc.std(TPsum) |
|---|
| 1359 | else: |
|---|
| 1360 | stdv = 0. |
|---|
| 1361 | TPstdV.append(stdv) |
|---|
| 1362 | ## horizontal standard deviation |
|---|
| 1363 | if len(FPsum) > 1: |
|---|
| 1364 | stdh = statc.std(FPsum) |
|---|
| 1365 | else: |
|---|
| 1366 | stdh = 0. |
|---|
| 1367 | TPstdH.append(stdh) |
|---|
| 1368 | |
|---|
| 1369 | average.append(TPavg) |
|---|
| 1370 | stdevV.append(TPstdV) |
|---|
| 1371 | stdevH.append(TPstdH) |
|---|
| 1372 | |
|---|
| 1373 | return average, stdevV, stdevH |
|---|
| 1374 | |
|---|
| 1375 | ## Calibration Curve |
|---|
| 1376 | ## returns an array of (curve, yesClassPredictions, noClassPredictions) |
|---|
| 1377 | ## elements, where: |
|---|
| 1378 | ## - curve is an array of points (x, y) on the calibration curve |
|---|
| 1379 | ## - yesClassRugPoints is an array of (x, 1) points |
|---|
| 1380 | ## - noClassRugPoints is an array of (x, 0) points |
|---|
| 1381 | @deprecated_keywords({"classIndex": "class_index"}) |
|---|
| 1382 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1383 | def compute_calibration_curve(res, class_index= -1): |
|---|
| 1384 | ## merge multiple iterations into one |
|---|
| 1385 | mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, |
|---|
| 1386 | res.class_values, res.weights, classifiers=res.classifiers, |
|---|
| 1387 | loaded=res.loaded, test_type=res.test_type, labels=res.labels) |
|---|
| 1388 | for te in res.results: |
|---|
| 1389 | mres.results.append(te) |
|---|
| 1390 | |
|---|
| 1391 | problists, tots = corn.computeROCCumulative(mres, class_index) |
|---|
| 1392 | |
|---|
| 1393 | results = [] |
|---|
| 1394 | |
|---|
| 1395 | bins = 10 ## divide interval between 0. and 1. into N bins |
|---|
| 1396 | |
|---|
| 1397 | for plist in problists: |
|---|
| 1398 | yesClassRugPoints = [] |
|---|
| 1399 | noClassRugPoints = [] |
|---|
| 1400 | |
|---|
| 1401 | yesBinsVals = [0] * bins |
|---|
| 1402 | noBinsVals = [0] * bins |
|---|
| 1403 | for (f, (thisNeg, thisPos)) in plist: |
|---|
| 1404 | yesClassRugPoints.append((f, thisPos)) # 1. |
|---|
| 1405 | noClassRugPoints.append((f, thisNeg)) # 1. |
|---|
| 1406 | |
|---|
| 1407 | index = int(f * bins) |
|---|
| 1408 | index = min(index, bins - 1) ## just in case for value 1. |
|---|
| 1409 | yesBinsVals[index] += thisPos |
|---|
| 1410 | noBinsVals[index] += thisNeg |
|---|
| 1411 | |
|---|
| 1412 | curve = [] |
|---|
| 1413 | for cn in range(bins): |
|---|
| 1414 | f = float(cn * 1. / bins) + (1. / 2. / bins) |
|---|
| 1415 | yesVal = yesBinsVals[cn] |
|---|
| 1416 | noVal = noBinsVals[cn] |
|---|
| 1417 | allVal = yesVal + noVal |
|---|
| 1418 | if allVal == 0.: continue |
|---|
| 1419 | y = float(yesVal) / float(allVal) |
|---|
| 1420 | curve.append((f, y)) |
|---|
| 1421 | |
|---|
| 1422 | ## smooth the curve |
|---|
| 1423 | maxnPoints = 100 |
|---|
| 1424 | if len(curve) >= 3: |
|---|
| 1425 | # loessCurve = statc.loess(curve, -3, 0.6) |
|---|
| 1426 | loessCurve = statc.loess(curve, maxnPoints, 0.5, 3) |
|---|
| 1427 | else: |
|---|
| 1428 | loessCurve = curve |
|---|
| 1429 | clen = len(loessCurve) |
|---|
| 1430 | if clen > maxnPoints: |
|---|
| 1431 | df = clen / maxnPoints |
|---|
| 1432 | if df < 1: df = 1 |
|---|
| 1433 | curve = [loessCurve[i] for i in range(0, clen, df)] |
|---|
| 1434 | else: |
|---|
| 1435 | curve = loessCurve |
|---|
| 1436 | ## remove the third value (variance of epsilon?) that suddenly |
|---|
| 1437 | ## appeared in the output of the statc.loess function |
|---|
| 1438 | curve = [c[:2] for c in curve] |
|---|
| 1439 | results.append((curve, yesClassRugPoints, noClassRugPoints)) |
|---|
| 1440 | |
|---|
| 1441 | return results |
|---|
| 1442 | |
|---|
| 1443 | |
|---|
| 1444 | ## Lift Curve |
|---|
| 1445 | ## returns an array of curve elements, where: |
|---|
| 1446 | ## - curve is an array of points ((TP + FP) / (P + N), TP / P, (th, FP / N)) |
|---|
| 1447 | ## on the Lift Curve |
|---|
| 1448 | @deprecated_keywords({"classIndex": "class_index"}) |
|---|
| 1449 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1450 | def compute_lift_curve(res, class_index= -1): |
|---|
| 1451 | ## merge multiple iterations into one |
|---|
| 1452 | mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, |
|---|
| 1453 | res.class_values, res.weights, classifiers=res.classifiers, |
|---|
| 1454 | loaded=res.loaded, test_type=res.test_type, labels=res.labels) |
|---|
| 1455 | for te in res.results: |
|---|
| 1456 | mres.results.append(te) |
|---|
| 1457 | |
|---|
| 1458 | problists, tots = corn.computeROCCumulative(mres, class_index) |
|---|
| 1459 | |
|---|
| 1460 | results = [] |
|---|
| 1461 | P, N = tots[1], tots[0] |
|---|
| 1462 | for plist in problists: |
|---|
| 1463 | ## corn gives an increasing by scores list, we need a decreasing |
|---|
| 1464 | plist.reverse() |
|---|
| 1465 | TP = 0. |
|---|
| 1466 | FP = 0. |
|---|
| 1467 | curve = [(0., 0., (10e300, 0.))] |
|---|
| 1468 | for (f, (thisNeg, thisPos)) in plist: |
|---|
| 1469 | TP += thisPos |
|---|
| 1470 | FP += thisNeg |
|---|
| 1471 | curve.append(((TP + FP) / (P + N), TP, (f, FP / (N or 1)))) |
|---|
| 1472 | results.append(curve) |
|---|
| 1473 | |
|---|
| 1474 | return P, N, results |
|---|
| 1475 | |
|---|
| 1476 | |
|---|
| 1477 | class CDT: |
|---|
| 1478 | """Store the number of concordant (C), discordant (D) and tied (T) pairs.""" |
|---|
| 1479 | def __init__(self, C=0., D=0., T=0.): |
|---|
| 1480 | self.C, self.D, self.T = C, D, T |
|---|
| 1481 | |
|---|
| 1482 | def is_CDT_empty(cdt): |
|---|
| 1483 | return cdt.C + cdt.D + cdt.T < 1e-20 |
|---|
| 1484 | |
|---|
| 1485 | |
|---|
| 1486 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1487 | "unweighted": "ignore_weights"}) |
|---|
| 1488 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1489 | def compute_CDT(res, class_index= -1, ignore_weights=False, **argkw): |
|---|
| 1490 | """Obsolete, don't use.""" |
|---|
| 1491 | if class_index < 0: |
|---|
| 1492 | if res.base_class >= 0: |
|---|
| 1493 | class_index = res.base_class |
|---|
| 1494 | else: |
|---|
| 1495 | class_index = 1 |
|---|
| 1496 | |
|---|
| 1497 | useweights = res.weights and not ignore_weights |
|---|
| 1498 | weightByClasses = argkw.get("weightByClasses", True) |
|---|
| 1499 | |
|---|
| 1500 | if res.number_of_iterations > 1: |
|---|
| 1501 | CDTs = [CDT() for _ in range(res.number_of_learners)] |
|---|
| 1502 | iterationExperiments = split_by_iterations(res) |
|---|
| 1503 | for exp in iterationExperiments: |
|---|
| 1504 | expCDTs = corn.computeCDT(exp, class_index, useweights) |
|---|
| 1505 | for i in range(len(CDTs)): |
|---|
| 1506 | CDTs[i].C += expCDTs[i].C |
|---|
| 1507 | CDTs[i].D += expCDTs[i].D |
|---|
| 1508 | CDTs[i].T += expCDTs[i].T |
|---|
| 1509 | for i in range(res.number_of_learners): |
|---|
| 1510 | if is_CDT_empty(CDTs[0]): |
|---|
| 1511 | return corn.computeCDT(res, class_index, useweights) |
|---|
| 1512 | |
|---|
| 1513 | return CDTs |
|---|
| 1514 | else: |
|---|
| 1515 | return corn.computeCDT(res, class_index, useweights) |
|---|
| 1516 | |
|---|
| 1517 | class AUC(list): |
|---|
| 1518 | """ |
|---|
| 1519 | Compute the area under ROC curve given a set of experimental results. |
|---|
| 1520 | If testing consisted of multiple folds, each fold is scored and the |
|---|
| 1521 | average score is returned. If a fold contains only instances with the |
|---|
| 1522 | same class value, folds will be merged. |
|---|
| 1523 | |
|---|
| 1524 | :param test_results: test results to score |
|---|
| 1525 | :param ignore_weights: ignore instance weights when calculating score |
|---|
| 1526 | :param multiclass: tells what kind of averaging to perform if the target |
|---|
| 1527 | class has more than 2 values. |
|---|
| 1528 | """ |
|---|
| 1529 | |
|---|
| 1530 | #!Compute AUC for each pair of classes (ignoring instances of all other |
|---|
| 1531 | #!classes) and average the results, weighting them by the number of |
|---|
| 1532 | #!pairs of instances from these two classes (e.g. by the product of |
|---|
| 1533 | #!probabilities of the two classes). AUC computed in this way still |
|---|
| 1534 | #!behaves as the concordance index, e.g., gives the probability that two |
|---|
| 1535 | #!randomly chosen instances from different classes will be correctly |
|---|
| 1536 | #!recognized (if the classifier knows from which two classes the |
|---|
| 1537 | #!instances came). |
|---|
| 1538 | ByWeightedPairs = 0 |
|---|
| 1539 | |
|---|
| 1540 | #!Similar to ByWeightedPairs, except that the average over class pairs |
|---|
| 1541 | #!is not weighted. This AUC is, like the binary version, independent of |
|---|
| 1542 | #!class distributions, but it is not related to the concordance index |
|---|
| 1543 | #!any more. |
|---|
| 1544 | ByPairs = 1 |
|---|
| 1545 | |
|---|
| 1546 | #!For each class, it computes AUC for this class against all others (that |
|---|
| 1547 | #!is, treating other classes as one class). The AUCs are then averaged by |
|---|
| 1548 | #!the class probabilities. This is related to the concordance index in |
|---|
| 1549 | #!which we test the classifier's (average) capability of distinguishing |
|---|
| 1550 | #!the instances from a specified class from those that come from other |
|---|
| 1551 | #!classes. |
|---|
| 1552 | #!Unlike the binary AUC, the measure is not independent of class |
|---|
| 1553 | #!distributions. |
|---|
| 1554 | WeightedOneAgainstAll = 2 |
|---|
| 1555 | |
|---|
| 1556 | #!Similar to weighted_one_against_all, except that the average |
|---|
| 1557 | #!is not weighted. |
|---|
| 1558 | OneAgainstAll = 3 |
|---|
| 1559 | |
|---|
| 1560 | @replace_use_weights |
|---|
| 1561 | @deprecated_keywords({"method": "multiclass"}) |
|---|
| 1562 | def __init__(self, test_results=None, multiclass=ByWeightedPairs, ignore_weights=False): |
|---|
| 1563 | |
|---|
| 1564 | super(AUC, self).__init__() |
|---|
| 1565 | |
|---|
| 1566 | self.ignore_weights = ignore_weights |
|---|
| 1567 | self.method = multiclass |
|---|
| 1568 | |
|---|
| 1569 | if test_results is not None: |
|---|
| 1570 | r = self.__call__(test_results) |
|---|
| 1571 | if r == False: #when the test is invalid it can return a single False |
|---|
| 1572 | r = [ False ] * test_results.number_of_learners |
|---|
| 1573 | self[:] = r |
|---|
| 1574 | |
|---|
| 1575 | @replace_discrete_probabilities_with_list(method=True) |
|---|
| 1576 | def __call__(self, test_results): |
|---|
| 1577 | if len(test_results.class_values) < 2: |
|---|
| 1578 | raise ValueError("Cannot compute AUC on a single-class problem") |
|---|
| 1579 | elif len(test_results.class_values) == 2: |
|---|
| 1580 | return self._compute_for_binary_class(test_results) |
|---|
| 1581 | else: |
|---|
| 1582 | return self._compute_for_multi_value_class(test_results, self.method) |
|---|
| 1583 | |
|---|
| 1584 | def _compute_for_binary_class(self, res): |
|---|
| 1585 | """AUC for binary classification problems""" |
|---|
| 1586 | if res.number_of_iterations > 1: |
|---|
| 1587 | return self._compute_for_multiple_folds( |
|---|
| 1588 | self._compute_one_class_against_all, |
|---|
| 1589 | split_by_iterations(res), |
|---|
| 1590 | (-1, res, res.number_of_iterations)) |
|---|
| 1591 | else: |
|---|
| 1592 | return self._compute_one_class_against_all(res, -1)[0] |
|---|
| 1593 | |
|---|
| 1594 | def _compute_for_multi_value_class(self, res, method=0): |
|---|
| 1595 | """AUC for multiclass classification problems""" |
|---|
| 1596 | numberOfClasses = len(res.class_values) |
|---|
| 1597 | |
|---|
| 1598 | if res.number_of_iterations > 1: |
|---|
| 1599 | iterations = split_by_iterations(res) |
|---|
| 1600 | all_ite = res |
|---|
| 1601 | else: |
|---|
| 1602 | iterations = [res] |
|---|
| 1603 | all_ite = None |
|---|
| 1604 | |
|---|
| 1605 | # by pairs |
|---|
| 1606 | sum_aucs = [0.] * res.number_of_learners |
|---|
| 1607 | usefulClassPairs = 0. |
|---|
| 1608 | |
|---|
| 1609 | prob = None |
|---|
| 1610 | if method in [self.ByWeightedPairs, self.WeightedOneAgainstAll]: |
|---|
| 1611 | prob = class_probabilities_from_res(res) |
|---|
| 1612 | |
|---|
| 1613 | if method in [self.ByWeightedPairs, self.ByPairs]: |
|---|
| 1614 | for classIndex1 in range(numberOfClasses): |
|---|
| 1615 | for classIndex2 in range(classIndex1): |
|---|
| 1616 | subsum_aucs = self._compute_for_multiple_folds( |
|---|
| 1617 | self._compute_one_class_against_another, iterations, |
|---|
| 1618 | (classIndex1, classIndex2, all_ite, |
|---|
| 1619 | res.number_of_iterations)) |
|---|
| 1620 | if subsum_aucs: |
|---|
| 1621 | if method == self.ByWeightedPairs: |
|---|
| 1622 | p_ij = prob[classIndex1] * prob[classIndex2] |
|---|
| 1623 | subsum_aucs = [x * p_ij for x in subsum_aucs] |
|---|
| 1624 | usefulClassPairs += p_ij |
|---|
| 1625 | else: |
|---|
| 1626 | usefulClassPairs += 1 |
|---|
| 1627 | sum_aucs = map(add, sum_aucs, subsum_aucs) |
|---|
| 1628 | else: |
|---|
| 1629 | for classIndex in range(numberOfClasses): |
|---|
| 1630 | subsum_aucs = self._compute_for_multiple_folds( |
|---|
| 1631 | self._compute_one_class_against_all, |
|---|
| 1632 | iterations, (classIndex, all_ite, |
|---|
| 1633 | res.number_of_iterations)) |
|---|
| 1634 | if subsum_aucs: |
|---|
| 1635 | if method == self.ByWeightedPairs: |
|---|
| 1636 | p_i = prob[classIndex] |
|---|
| 1637 | subsum_aucs = [x * p_i for x in subsum_aucs] |
|---|
| 1638 | usefulClassPairs += p_i |
|---|
| 1639 | else: |
|---|
| 1640 | usefulClassPairs += 1 |
|---|
| 1641 | sum_aucs = map(add, sum_aucs, subsum_aucs) |
|---|
| 1642 | |
|---|
| 1643 | if usefulClassPairs > 0: |
|---|
| 1644 | sum_aucs = [x / usefulClassPairs for x in sum_aucs] |
|---|
| 1645 | |
|---|
| 1646 | return sum_aucs |
|---|
| 1647 | |
|---|
| 1648 | # computes the average AUC over folds using "AUCcomputer" (AUC_i or AUC_ij) |
|---|
| 1649 | # it returns the sum of what is returned by the computer, |
|---|
| 1650 | # unless at a certain fold the computer has to resort to computing |
|---|
| 1651 | # over all folds or even this failed; |
|---|
| 1652 | # in these cases the result is returned immediately |
|---|
| 1653 | def _compute_for_multiple_folds(self, auc_computer, iterations, |
|---|
| 1654 | computer_args): |
|---|
| 1655 | """Compute the average AUC over folds using :obj:`auc_computer`.""" |
|---|
| 1656 | subsum_aucs = [0.] * iterations[0].number_of_learners |
|---|
| 1657 | for ite in iterations: |
|---|
| 1658 | aucs, foldsUsed = auc_computer(*(ite,) + computer_args) |
|---|
| 1659 | if not aucs: |
|---|
| 1660 | import warnings |
|---|
| 1661 | warnings.warn("AUC cannot be computed (all instances belong to the same class).") |
|---|
| 1662 | return |
|---|
| 1663 | if not foldsUsed: |
|---|
| 1664 | self[:] = aucs |
|---|
| 1665 | return aucs |
|---|
| 1666 | subsum_aucs = map(add, subsum_aucs, aucs) |
|---|
| 1667 | return subsum_aucs |
|---|
| 1668 | |
|---|
| 1669 | # Computes AUC |
|---|
| 1670 | # in multivalued class problem, AUC is computed as one against all |
|---|
| 1671 | # results over folds are averages |
|---|
| 1672 | # if some folds examples from one class only, the folds are merged |
|---|
| 1673 | def _compute_for_single_class(self, res, class_index): |
|---|
| 1674 | if res.number_of_iterations > 1: |
|---|
| 1675 | return self._compute_for_multiple_folds( |
|---|
| 1676 | self._compute_one_class_against_all, split_by_iterations(res), |
|---|
| 1677 | (class_index, res, res.number_of_iterations)) |
|---|
| 1678 | else: |
|---|
| 1679 | return self._compute_one_class_against_all(res, class_index)[0] |
|---|
| 1680 | |
|---|
| 1681 | # Computes AUC for a pair of classes (as if there were no other classes) |
|---|
| 1682 | # results over folds are averages |
|---|
| 1683 | # if some folds have examples from one class only, the folds are merged |
|---|
| 1684 | def _compute_for_pair_of_classes(self, res, class_index1, class_index2): |
|---|
| 1685 | if res.number_of_iterations > 1: |
|---|
| 1686 | return self._compute_for_multiple_folds( |
|---|
| 1687 | self._compute_one_class_against_another, |
|---|
| 1688 | split_by_iterations(res), |
|---|
| 1689 | (class_index1, class_index2, res, res.number_of_iterations)) |
|---|
| 1690 | else: |
|---|
| 1691 | return self._compute_one_class_against_another(res, class_index1, |
|---|
| 1692 | class_index2) |
|---|
| 1693 | |
|---|
| 1694 | # computes AUC between class i and the other classes |
|---|
| 1695 | # (treating them as the same class) |
|---|
| 1696 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1697 | "divideByIfIte": "divide_by_if_ite"}) |
|---|
| 1698 | def _compute_one_class_against_all(self, ite, class_index, all_ite=None, |
|---|
| 1699 | divide_by_if_ite=1.0): |
|---|
| 1700 | """Compute AUC between class i and all the other classes)""" |
|---|
| 1701 | return self._compute_auc(corn.computeCDT, ite, all_ite, |
|---|
| 1702 | divide_by_if_ite, (class_index, not self.ignore_weights)) |
|---|
| 1703 | |
|---|
| 1704 | |
|---|
| 1705 | # computes AUC between classes i and j as if there are no other classes |
|---|
| 1706 | def _compute_one_class_against_another( |
|---|
| 1707 | self, ite, class_index1, class_index2, |
|---|
| 1708 | all_ite=None, divide_by_if_ite=1.): |
|---|
| 1709 | """ |
|---|
| 1710 | Compute AUC between classes i and j as if there are no other classes. |
|---|
| 1711 | """ |
|---|
| 1712 | return self._compute_auc(corn.computeCDTPair, ite, |
|---|
| 1713 | all_ite, divide_by_if_ite, |
|---|
| 1714 | (class_index1, class_index2, not self.ignore_weights)) |
|---|
| 1715 | |
|---|
| 1716 | # computes AUC using a specified 'cdtComputer' function |
|---|
| 1717 | # It tries to compute AUCs from 'ite' (examples from a single iteration) |
|---|
| 1718 | # and, if C+D+T=0, from 'all_ite' (entire test set). In the former case, |
|---|
| 1719 | # the AUCs are divided by 'divideByIfIte'. |
|---|
| 1720 | # Additional flag is returned which is True in the former case, |
|---|
| 1721 | # or False in the latter. |
|---|
| 1722 | @deprecated_keywords({"cdt_computer": "cdtComputer", |
|---|
| 1723 | "divideByIfIte": "divide_by_if_ite", |
|---|
| 1724 | "computerArgs": "computer_args"}) |
|---|
| 1725 | def _compute_auc(self, cdt_computer, ite, all_ite, divide_by_if_ite, |
|---|
| 1726 | computer_args): |
|---|
| 1727 | """ |
|---|
| 1728 | Compute AUC using a :obj:`cdt_computer`. |
|---|
| 1729 | """ |
|---|
| 1730 | cdts = cdt_computer(*(ite,) + computer_args) |
|---|
| 1731 | if not is_CDT_empty(cdts[0]): |
|---|
| 1732 | return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T) / |
|---|
| 1733 | divide_by_if_ite for cdt in cdts], True |
|---|
| 1734 | |
|---|
| 1735 | if all_ite: |
|---|
| 1736 | cdts = cdt_computer(*(all_ite,) + computer_args) |
|---|
| 1737 | if not is_CDT_empty(cdts[0]): |
|---|
| 1738 | return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T) |
|---|
| 1739 | for cdt in cdts], False |
|---|
| 1740 | |
|---|
| 1741 | return False, False |
|---|
| 1742 | |
|---|
| 1743 | class AUC_for_single_class(AUC): |
|---|
| 1744 | """ |
|---|
| 1745 | Compute AUC where the class with the given class_index is singled |
|---|
| 1746 | out and all other classes are treated as a single class. |
|---|
| 1747 | """ |
|---|
| 1748 | def __init__(self, test_results=None, class_index= -1, ignore_weights=False): |
|---|
| 1749 | if class_index < 0: |
|---|
| 1750 | if test_results and test_results.base_class >= 0: |
|---|
| 1751 | self.class_index = test_results.base_class |
|---|
| 1752 | else: |
|---|
| 1753 | self.class_index = 1 |
|---|
| 1754 | else: |
|---|
| 1755 | self.class_index = class_index |
|---|
| 1756 | |
|---|
| 1757 | super(AUC_for_single_class, self).__init__(test_results, ignore_weights=ignore_weights) |
|---|
| 1758 | |
|---|
| 1759 | @replace_discrete_probabilities_with_list(method=True) |
|---|
| 1760 | def __call__(self, test_results): |
|---|
| 1761 | return self._compute_for_single_class(test_results, self.class_index) |
|---|
| 1762 | |
|---|
| 1763 | |
|---|
| 1764 | class AUC_for_pair_of_classes(AUC): |
|---|
| 1765 | """ |
|---|
| 1766 | Computes AUC between a pair of classes, ignoring instances from all |
|---|
| 1767 | other classes. |
|---|
| 1768 | """ |
|---|
| 1769 | def __init__(self, test_results, class_index1, class_index2, ignore_weights=False): |
|---|
| 1770 | self.class_index1 = class_index1 |
|---|
| 1771 | self.class_index2 = class_index2 |
|---|
| 1772 | |
|---|
| 1773 | super(AUC_for_pair_of_classes, self).__init__(test_results, ignore_weights=ignore_weights) |
|---|
| 1774 | |
|---|
| 1775 | @replace_discrete_probabilities_with_list(method=True) |
|---|
| 1776 | def __call__(self, test_results): |
|---|
| 1777 | return self._compute_for_pair_of_classes(test_results, self.class_index1, self.class_index2) |
|---|
| 1778 | |
|---|
| 1779 | |
|---|
| 1780 | class AUC_matrix(AUC): |
|---|
| 1781 | """ |
|---|
| 1782 | Compute a (lower diagonal) matrix with AUCs for all pairs of classes. |
|---|
| 1783 | If there are empty classes, the corresponding elements in the matrix |
|---|
| 1784 | are -1. |
|---|
| 1785 | """ |
|---|
| 1786 | |
|---|
| 1787 | @replace_discrete_probabilities_with_list(method=True) |
|---|
| 1788 | def __call__(self, test_results): |
|---|
| 1789 | numberOfClasses = len(test_results.class_values) |
|---|
| 1790 | number_of_learners = test_results.number_of_learners |
|---|
| 1791 | if test_results.number_of_iterations > 1: |
|---|
| 1792 | iterations, all_ite = split_by_iterations(test_results), test_results |
|---|
| 1793 | else: |
|---|
| 1794 | iterations, all_ite = [test_results], None |
|---|
| 1795 | aucs = [[[] for _ in range(numberOfClasses)] |
|---|
| 1796 | for _ in range(number_of_learners)] |
|---|
| 1797 | for classIndex1 in range(numberOfClasses): |
|---|
| 1798 | for classIndex2 in range(classIndex1): |
|---|
| 1799 | pair_aucs = self._compute_for_multiple_folds( |
|---|
| 1800 | self._compute_one_class_against_another, iterations, |
|---|
| 1801 | (classIndex1, classIndex2, all_ite, |
|---|
| 1802 | test_results.number_of_iterations)) |
|---|
| 1803 | if pair_aucs: |
|---|
| 1804 | for lrn in range(number_of_learners): |
|---|
| 1805 | aucs[lrn][classIndex1].append(pair_aucs[lrn]) |
|---|
| 1806 | else: |
|---|
| 1807 | for lrn in range(number_of_learners): |
|---|
| 1808 | aucs[lrn][classIndex1].append(-1) |
|---|
| 1809 | return aucs |
|---|
| 1810 | |
|---|
| 1811 | #Backward compatibility |
|---|
| 1812 | @replace_use_weights |
|---|
| 1813 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1814 | def AUC_binary(res, ignore_weights=False): |
|---|
| 1815 | auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights) |
|---|
| 1816 | auc._compute_for_binary_class(res) |
|---|
| 1817 | return auc |
|---|
| 1818 | |
|---|
| 1819 | @replace_use_weights |
|---|
| 1820 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1821 | def AUC_multi(res, ignore_weights=False, method=0): |
|---|
| 1822 | auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights, |
|---|
| 1823 | method=method) |
|---|
| 1824 | auc._compute_for_multi_value_class(res) |
|---|
| 1825 | return auc |
|---|
| 1826 | |
|---|
| 1827 | |
|---|
| 1828 | @deprecated_keywords({"AUCcomputer": "auc_computer", |
|---|
| 1829 | "computerArgs": "computer_args"}) |
|---|
| 1830 | def AUC_iterations(auc_computer, iterations, computer_args): |
|---|
| 1831 | auc = deprecated_function_name(AUC)() |
|---|
| 1832 | auc._compute_for_multiple_folds(auc_computer, iterations, computer_args) |
|---|
| 1833 | return auc |
|---|
| 1834 | |
|---|
| 1835 | def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args): |
|---|
| 1836 | auc = deprecated_function_name(AUC)() |
|---|
| 1837 | result = auc._compute_auc(cdtComputer, ite, all_ite, divide_by_if_ite, |
|---|
| 1838 | computer_args) |
|---|
| 1839 | return result |
|---|
| 1840 | |
|---|
| 1841 | @replace_use_weights |
|---|
| 1842 | def AUC_i(ite, class_index, ignore_weights=False, all_ite=None, |
|---|
| 1843 | divide_by_if_ite=1.): |
|---|
| 1844 | auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights) |
|---|
| 1845 | result = auc._compute_one_class_against_another(ite, class_index, |
|---|
| 1846 | all_ite=all_ite, divide_by_if_ite=divide_by_if_ite) |
|---|
| 1847 | return result |
|---|
| 1848 | |
|---|
| 1849 | |
|---|
| 1850 | @replace_use_weights |
|---|
| 1851 | def AUC_ij(ite, class_index1, class_index2, ignore_weights=False, |
|---|
| 1852 | all_ite=None, divide_by_if_ite=1.): |
|---|
| 1853 | auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights) |
|---|
| 1854 | result = auc._compute_one_class_against_another( |
|---|
| 1855 | ite, class_index1, class_index2, all_ite=all_ite, divide_by_if_ite=divide_by_if_ite) |
|---|
| 1856 | return result |
|---|
| 1857 | |
|---|
| 1858 | AUC_single = replace_use_weights( |
|---|
| 1859 | deprecated_keywords({"classIndex": "class_index"})( |
|---|
| 1860 | deprecated_function_name(AUC_for_single_class))) |
|---|
| 1861 | AUC_pair = replace_use_weights( |
|---|
| 1862 | deprecated_keywords({"classIndex1": "class_index1", |
|---|
| 1863 | "classIndex2": "class_index2"})( |
|---|
| 1864 | deprecated_function_name(AUC_for_pair_of_classes))) |
|---|
| 1865 | AUC_matrix = replace_use_weights(AUC_matrix) |
|---|
| 1866 | |
|---|
| 1867 | |
|---|
| 1868 | @deprecated_keywords({"unweighted": "ignore_weights"}) |
|---|
| 1869 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1870 | def McNemar(res, ignore_weights=False, **argkw): |
|---|
| 1871 | """ |
|---|
| 1872 | Compute a triangular matrix with McNemar statistics for each pair of |
|---|
| 1873 | classifiers. The statistics is distributed by chi-square distribution with |
|---|
| 1874 | one degree of freedom; critical value for 5% significance is around 3.84. |
|---|
| 1875 | """ |
|---|
| 1876 | nLearners = res.number_of_learners |
|---|
| 1877 | mcm = [] |
|---|
| 1878 | for i in range(nLearners): |
|---|
| 1879 | mcm.append([0.] * res.number_of_learners) |
|---|
| 1880 | |
|---|
| 1881 | if not res.weights or ignore_weights: |
|---|
| 1882 | for i in res.results: |
|---|
| 1883 | actual = i.actual_class |
|---|
| 1884 | classes = i.classes |
|---|
| 1885 | for l1 in range(nLearners): |
|---|
| 1886 | for l2 in range(l1, nLearners): |
|---|
| 1887 | if classes[l1] == actual: |
|---|
| 1888 | if classes[l2] != actual: |
|---|
| 1889 | mcm[l1][l2] += 1 |
|---|
| 1890 | elif classes[l2] == actual: |
|---|
| 1891 | mcm[l2][l1] += 1 |
|---|
| 1892 | else: |
|---|
| 1893 | for i in res.results: |
|---|
| 1894 | actual = i.actual_class |
|---|
| 1895 | classes = i.classes |
|---|
| 1896 | for l1 in range(nLearners): |
|---|
| 1897 | for l2 in range(l1, nLearners): |
|---|
| 1898 | if classes[l1] == actual: |
|---|
| 1899 | if classes[l2] != actual: |
|---|
| 1900 | mcm[l1][l2] += i.weight |
|---|
| 1901 | elif classes[l2] == actual: |
|---|
| 1902 | mcm[l2][l1] += i.weight |
|---|
| 1903 | |
|---|
| 1904 | for l1 in range(nLearners): |
|---|
| 1905 | for l2 in range(l1, nLearners): |
|---|
| 1906 | su = mcm[l1][l2] + mcm[l2][l1] |
|---|
| 1907 | if su: |
|---|
| 1908 | mcm[l2][l1] = (abs(mcm[l1][l2] - mcm[l2][l1]) - 1) ** 2 / su |
|---|
| 1909 | else: |
|---|
| 1910 | mcm[l2][l1] = 0 |
|---|
| 1911 | |
|---|
| 1912 | for l1 in range(nLearners): |
|---|
| 1913 | mcm[l1] = mcm[l1][:l1] |
|---|
| 1914 | |
|---|
| 1915 | return mcm |
|---|
| 1916 | |
|---|
| 1917 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1918 | def McNemar_of_two(res, lrn1, lrn2, ignore_weights=False): |
|---|
| 1919 | """ |
|---|
| 1920 | McNemar_of_two computes a McNemar statistics for a pair of classifier, |
|---|
| 1921 | specified by indices learner1 and learner2. |
|---|
| 1922 | """ |
|---|
| 1923 | tf = ft = 0. |
|---|
| 1924 | if not res.weights or ignore_weights: |
|---|
| 1925 | for i in res.results: |
|---|
| 1926 | actual = i.actual_class |
|---|
| 1927 | if i.classes[lrn1] == actual: |
|---|
| 1928 | if i.classes[lrn2] != actual: |
|---|
| 1929 | tf += i.weight |
|---|
| 1930 | elif i.classes[lrn2] == actual: |
|---|
| 1931 | ft += i.weight |
|---|
| 1932 | else: |
|---|
| 1933 | for i in res.results: |
|---|
| 1934 | actual = i.actual_class |
|---|
| 1935 | if i.classes[lrn1] == actual: |
|---|
| 1936 | if i.classes[lrn2] != actual: |
|---|
| 1937 | tf += 1. |
|---|
| 1938 | elif i.classes[lrn2] == actual: |
|---|
| 1939 | ft += 1. |
|---|
| 1940 | |
|---|
| 1941 | su = tf + ft |
|---|
| 1942 | if su: |
|---|
| 1943 | return (abs(tf - ft) - 1) ** 2 / su |
|---|
| 1944 | else: |
|---|
| 1945 | return 0 |
|---|
| 1946 | |
|---|
| 1947 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1948 | def Friedman(res, stat=CA): |
|---|
| 1949 | """ |
|---|
| 1950 | Compare classifiers with Friedman test, treating folds as different examles. |
|---|
| 1951 | Returns F, p and average ranks. |
|---|
| 1952 | """ |
|---|
| 1953 | res_split = split_by_iterations(res) |
|---|
| 1954 | res = [stat(r) for r in res_split] |
|---|
| 1955 | |
|---|
| 1956 | N = len(res) |
|---|
| 1957 | k = len(res[0]) |
|---|
| 1958 | sums = [0.] * k |
|---|
| 1959 | for r in res: |
|---|
| 1960 | ranks = [k - x + 1 for x in statc.rankdata(r)] |
|---|
| 1961 | if stat == Brier_score: # reverse ranks for Brier_score (lower better) |
|---|
| 1962 | ranks = [k + 1 - x for x in ranks] |
|---|
| 1963 | sums = map(add, ranks, sums) |
|---|
| 1964 | |
|---|
| 1965 | T = sum(x * x for x in sums) |
|---|
| 1966 | sums = [x / N for x in sums] |
|---|
| 1967 | |
|---|
| 1968 | F = 12. / (N * k * (k + 1)) * T - 3 * N * (k + 1) |
|---|
| 1969 | |
|---|
| 1970 | return F, statc.chisqprob(F, k - 1), sums |
|---|
| 1971 | |
|---|
| 1972 | @replace_discrete_probabilities_with_list(False) |
|---|
| 1973 | def Wilcoxon_pairs(res, avgranks, stat=CA): |
|---|
| 1974 | """ |
|---|
| 1975 | Return a triangular matrix, where element[i][j] stores significance of |
|---|
| 1976 | difference between the i-th and the j-th classifier, as computed by the |
|---|
| 1977 | Wilcoxon test. The element is positive if the i-th is better than the j-th, |
|---|
| 1978 | negative if it is worse, and 1 if they are equal. |
|---|
| 1979 | Arguments are ExperimentResults, average ranks (as returned by Friedman) |
|---|
| 1980 | and, optionally, a statistics; greater values should mean better results. |
|---|
| 1981 | """ |
|---|
| 1982 | res_split = split_by_iterations(res) |
|---|
| 1983 | res = [stat(r) for r in res_split] |
|---|
| 1984 | |
|---|
| 1985 | k = len(res[0]) |
|---|
| 1986 | bt = [] |
|---|
| 1987 | for m1 in range(k): |
|---|
| 1988 | nl = [] |
|---|
| 1989 | for m2 in range(m1 + 1, k): |
|---|
| 1990 | t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res]) |
|---|
| 1991 | if avgranks[m1] < avgranks[m2]: |
|---|
| 1992 | nl.append(p) |
|---|
| 1993 | elif avgranks[m2] < avgranks[m1]: |
|---|
| 1994 | nl.append(-p) |
|---|
| 1995 | else: |
|---|
| 1996 | nl.append(1) |
|---|
| 1997 | bt.append(nl) |
|---|
| 1998 | return bt |
|---|
| 1999 | |
|---|
| 2000 | |
|---|
| 2001 | @deprecated_keywords({"allResults": "all_results", |
|---|
| 2002 | "noConfidence": "no_confidence"}) |
|---|
| 2003 | def plot_learning_curve_learners(file, all_results, proportions, learners, |
|---|
| 2004 | no_confidence=0): |
|---|
| 2005 | plot_learning_curve(file, all_results, proportions, |
|---|
| 2006 | [Orange.misc.getobjectname(learners[i], "Learner %i" % i) |
|---|
| 2007 | for i in range(len(learners))], no_confidence) |
|---|
| 2008 | |
|---|
| 2009 | |
|---|
| 2010 | @deprecated_keywords({"allResults": "all_results", |
|---|
| 2011 | "noConfidence": "no_confidence"}) |
|---|
| 2012 | def plot_learning_curve(file, all_results, proportions, legend, |
|---|
| 2013 | no_confidence=0): |
|---|
| 2014 | import types |
|---|
| 2015 | fopened = 0 |
|---|
| 2016 | if type(file) == types.StringType: |
|---|
| 2017 | file = open(file, "wt") |
|---|
| 2018 | fopened = 1 |
|---|
| 2019 | |
|---|
| 2020 | file.write("set yrange [0:1]\n") |
|---|
| 2021 | file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1])) |
|---|
| 2022 | file.write("set multiplot\n\n") |
|---|
| 2023 | CAs = [CA(x, report_se=True) for x in all_results] |
|---|
| 2024 | |
|---|
| 2025 | file.write("plot \\\n") |
|---|
| 2026 | for i in range(len(legend) - 1): |
|---|
| 2027 | if not no_confidence: |
|---|
| 2028 | file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i + 1)) |
|---|
| 2029 | file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1)) |
|---|
| 2030 | if not no_confidence: |
|---|
| 2031 | file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend))) |
|---|
| 2032 | file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend))) |
|---|
| 2033 | |
|---|
| 2034 | for i in range(len(legend)): |
|---|
| 2035 | if not no_confidence: |
|---|
| 2036 | for p in range(len(proportions)): |
|---|
| 2037 | file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96 * CAs[p][i][1])) |
|---|
| 2038 | file.write("e\n\n") |
|---|
| 2039 | |
|---|
| 2040 | for p in range(len(proportions)): |
|---|
| 2041 | file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0])) |
|---|
| 2042 | file.write("e\n\n") |
|---|
| 2043 | |
|---|
| 2044 | if fopened: |
|---|
| 2045 | file.close() |
|---|
| 2046 | |
|---|
| 2047 | |
|---|
| 2048 | def print_single_ROC_curve_coordinates(file, curve): |
|---|
| 2049 | import types |
|---|
| 2050 | fopened = 0 |
|---|
| 2051 | if type(file) == types.StringType: |
|---|
| 2052 | file = open(file, "wt") |
|---|
| 2053 | fopened = 1 |
|---|
| 2054 | |
|---|
| 2055 | for coord in curve: |
|---|
| 2056 | file.write("%5.3f\t%5.3f\n" % tuple(coord)) |
|---|
| 2057 | |
|---|
| 2058 | if fopened: |
|---|
| 2059 | file.close() |
|---|
| 2060 | |
|---|
| 2061 | |
|---|
| 2062 | def plot_ROC_learners(file, curves, learners): |
|---|
| 2063 | plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]) |
|---|
| 2064 | |
|---|
| 2065 | def plot_ROC(file, curves, legend): |
|---|
| 2066 | import types |
|---|
| 2067 | fopened = 0 |
|---|
| 2068 | if type(file) == types.StringType: |
|---|
| 2069 | file = open(file, "wt") |
|---|
| 2070 | fopened = 1 |
|---|
| 2071 | |
|---|
| 2072 | file.write("set yrange [0:1]\n") |
|---|
| 2073 | file.write("set xrange [0:1]\n") |
|---|
| 2074 | file.write("set multiplot\n\n") |
|---|
| 2075 | |
|---|
| 2076 | file.write("plot \\\n") |
|---|
| 2077 | for leg in legend: |
|---|
| 2078 | file.write("'-' title '%s' with lines,\\\n" % leg) |
|---|
| 2079 | file.write("'-' title '' with lines\n") |
|---|
| 2080 | |
|---|
| 2081 | for curve in curves: |
|---|
| 2082 | for coord in curve: |
|---|
| 2083 | file.write("%5.3f\t%5.3f\n" % tuple(coord)) |
|---|
| 2084 | file.write("e\n\n") |
|---|
| 2085 | |
|---|
| 2086 | file.write("1.0\t1.0\n0.0\t0.0e\n\n") |
|---|
| 2087 | |
|---|
| 2088 | if fopened: |
|---|
| 2089 | file.close() |
|---|
| 2090 | |
|---|
| 2091 | |
|---|
| 2092 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 2093 | def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference= -1): |
|---|
| 2094 | plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference) |
|---|
| 2095 | |
|---|
| 2096 | |
|---|
| 2097 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 2098 | def plot_McNemar_curve(file, all_results, proportions, legend, reference= -1): |
|---|
| 2099 | if reference < 0: |
|---|
| 2100 | reference = len(legend) - 1 |
|---|
| 2101 | |
|---|
| 2102 | import types |
|---|
| 2103 | fopened = 0 |
|---|
| 2104 | if type(file) == types.StringType: |
|---|
| 2105 | file = open(file, "wt") |
|---|
| 2106 | fopened = 1 |
|---|
| 2107 | |
|---|
| 2108 | #file.write("set yrange [0:1]\n") |
|---|
| 2109 | #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1])) |
|---|
| 2110 | file.write("set multiplot\n\n") |
|---|
| 2111 | file.write("plot \\\n") |
|---|
| 2112 | tmap = range(reference) + range(reference + 1, len(legend)) |
|---|
| 2113 | for i in tmap[:-1]: |
|---|
| 2114 | file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1)) |
|---|
| 2115 | file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1])) |
|---|
| 2116 | file.write("\n") |
|---|
| 2117 | |
|---|
| 2118 | for i in tmap: |
|---|
| 2119 | for p in range(len(proportions)): |
|---|
| 2120 | file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference))) |
|---|
| 2121 | file.write("e\n\n") |
|---|
| 2122 | |
|---|
| 2123 | if fopened: |
|---|
| 2124 | file.close() |
|---|
| 2125 | |
|---|
| 2126 | default_point_types = ("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}") + tuple([chr(x) for x in range(97, 122)]) |
|---|
| 2127 | default_line_types = ("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots") |
|---|
| 2128 | |
|---|
| 2129 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 2130 | def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options): |
|---|
| 2131 | return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options) |
|---|
| 2132 | |
|---|
| 2133 | |
|---|
| 2134 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 2135 | def learning_curve_to_PiCTeX(file, all_results, proportions, **options): |
|---|
| 2136 | import types |
|---|
| 2137 | fopened = 0 |
|---|
| 2138 | if type(file) == types.StringType: |
|---|
| 2139 | file = open(file, "wt") |
|---|
| 2140 | fopened = 1 |
|---|
| 2141 | |
|---|
| 2142 | nexamples = len(all_results[0].results) |
|---|
| 2143 | CAs = [CA(x, report_se=True) for x in all_results] |
|---|
| 2144 | |
|---|
| 2145 | graphsize = float(options.get("graphsize", 10.0)) #cm |
|---|
| 2146 | difprop = proportions[-1] - proportions[0] |
|---|
| 2147 | ntestexamples = nexamples * proportions[-1] |
|---|
| 2148 | xunit = graphsize / ntestexamples |
|---|
| 2149 | |
|---|
| 2150 | yshift = float(options.get("yshift", -ntestexamples / 20.)) |
|---|
| 2151 | |
|---|
| 2152 | pointtypes = options.get("pointtypes", default_point_types) |
|---|
| 2153 | linetypes = options.get("linetypes", default_line_types) |
|---|
| 2154 | |
|---|
| 2155 | if options.has_key("numberedx"): |
|---|
| 2156 | numberedx = options["numberedx"] |
|---|
| 2157 | if type(numberedx) == types.IntType: |
|---|
| 2158 | if numberedx > 0: |
|---|
| 2159 | numberedx = [nexamples * proportions[int(i / float(numberedx) * len(proportions))] for i in range(numberedx)] + [proportions[-1] * nexamples] |
|---|
| 2160 | elif numberedx < 0: |
|---|
| 2161 | numberedx = -numberedx |
|---|
| 2162 | newn = [] |
|---|
| 2163 | for i in range(numberedx + 1): |
|---|
| 2164 | wanted = proportions[0] + float(i) / numberedx * difprop |
|---|
| 2165 | best = (10, 0) |
|---|
| 2166 | for t in proportions: |
|---|
| 2167 | td = abs(wanted - t) |
|---|
| 2168 | if td < best[0]: |
|---|
| 2169 | best = (td, t) |
|---|
| 2170 | if not best[1] in newn: |
|---|
| 2171 | newn.append(best[1]) |
|---|
| 2172 | newn.sort() |
|---|
| 2173 | numberedx = [nexamples * x for x in newn] |
|---|
| 2174 | elif type(numberedx[0]) == types.FloatType: |
|---|
| 2175 | numberedx = [nexamples * x for x in numberedx] |
|---|
| 2176 | else: |
|---|
| 2177 | numberedx = [nexamples * x for x in proportions] |
|---|
| 2178 | |
|---|
| 2179 | file.write("\\mbox{\n") |
|---|
| 2180 | file.write(" \\beginpicture\n") |
|---|
| 2181 | file.write(" \\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize)) |
|---|
| 2182 | file.write(" \\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples)) |
|---|
| 2183 | file.write(" \\axis bottom invisible\n")# label {#examples}\n") |
|---|
| 2184 | file.write(" ticks short at %s /\n" % reduce(lambda x, y:x + " " + y, ["%i" % (x * nexamples + 0.5) for x in proportions])) |
|---|
| 2185 | if numberedx: |
|---|
| 2186 | file.write(" long numbered at %s /\n" % reduce(lambda x, y:x + y, ["%i " % int(x + 0.5) for x in numberedx])) |
|---|
| 2187 | file.write(" /\n") |
|---|
| 2188 | file.write(" \\axis left invisible\n")# label {classification accuracy}\n") |
|---|
| 2189 | file.write(" shiftedto y=%5.3f\n" % yshift) |
|---|
| 2190 | file.write(" ticks short from 0.0 to 1.0 by 0.05\n") |
|---|
| 2191 | file.write(" long numbered from 0.0 to 1.0 by 0.25\n") |
|---|
| 2192 | file.write(" /\n") |
|---|
| 2193 | if options.has_key("default"): |
|---|
| 2194 | file.write(" \\setdashpattern<1pt, 1pt>\n") |
|---|
| 2195 | file.write(" \\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"])) |
|---|
| 2196 | |
|---|
| 2197 | for i in range(len(CAs[0])): |
|---|
| 2198 | coordinates = reduce(lambda x, y:x + " " + y, ["%i %5.3f" % (proportions[p] * nexamples, CAs[p][i][0]) for p in range(len(proportions))]) |
|---|
| 2199 | if linetypes: |
|---|
| 2200 | file.write(" %s\n" % linetypes[i]) |
|---|
| 2201 | file.write(" \\plot %s /\n" % coordinates) |
|---|
| 2202 | if pointtypes: |
|---|
| 2203 | file.write(" \\multiput %s at %s /\n" % (pointtypes[i], coordinates)) |
|---|
| 2204 | |
|---|
| 2205 | file.write(" \\endpicture\n") |
|---|
| 2206 | file.write("}\n") |
|---|
| 2207 | if fopened: |
|---|
| 2208 | file.close() |
|---|
| 2209 | file.close() |
|---|
| 2210 | del file |
|---|
| 2211 | |
|---|
| 2212 | def legend_learners_to_PiCTeX(file, learners, **options): |
|---|
| 2213 | return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options) |
|---|
| 2214 | |
|---|
| 2215 | def legend_to_PiCTeX(file, legend, **options): |
|---|
| 2216 | import types |
|---|
| 2217 | fopened = 0 |
|---|
| 2218 | if type(file) == types.StringType: |
|---|
| 2219 | file = open(file, "wt") |
|---|
| 2220 | fopened = 1 |
|---|
| 2221 | |
|---|
| 2222 | pointtypes = options.get("pointtypes", default_point_types) |
|---|
| 2223 | linetypes = options.get("linetypes", default_line_types) |
|---|
| 2224 | |
|---|
| 2225 | file.write("\\mbox{\n") |
|---|
| 2226 | file.write(" \\beginpicture\n") |
|---|
| 2227 | file.write(" \\setcoordinatesystem units <5cm, 1pt>\n\n") |
|---|
| 2228 | file.write(" \\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend)) |
|---|
| 2229 | |
|---|
| 2230 | for i in range(len(legend)): |
|---|
| 2231 | if linetypes: |
|---|
| 2232 | file.write(" %s\n" % linetypes[i]) |
|---|
| 2233 | file.write(" \\plot %5.3f 6 %5.3f 6 /\n" % (i, i + 0.2)) |
|---|
| 2234 | if pointtypes: |
|---|
| 2235 | file.write(" \\put {%s} at %5.3f 6\n" % (pointtypes[i], i + 0.1)) |
|---|
| 2236 | file.write(" \\put {%s} [lb] at %5.3f 0\n" % (legend[i], i + 0.25)) |
|---|
| 2237 | |
|---|
| 2238 | file.write(" \\endpicture\n") |
|---|
| 2239 | file.write("}\n") |
|---|
| 2240 | if fopened: |
|---|
| 2241 | file.close() |
|---|
| 2242 | file.close() |
|---|
| 2243 | del file |
|---|
| 2244 | |
|---|
| 2245 | |
|---|
| 2246 | def compute_friedman(avranks, N): |
|---|
| 2247 | """ Returns a tuple composed of (friedman statistic, degrees of freedom) |
|---|
| 2248 | and (Iman statistic - F-distribution, degrees of freedoma) given average |
|---|
| 2249 | ranks and a number of tested data sets N. |
|---|
| 2250 | """ |
|---|
| 2251 | |
|---|
| 2252 | k = len(avranks) |
|---|
| 2253 | |
|---|
| 2254 | def friedman(N, k, ranks): |
|---|
| 2255 | return 12 * N * (sum([rank ** 2.0 for rank in ranks]) - (k * (k + 1) * (k + 1) / 4.0)) / (k * (k + 1)) |
|---|
| 2256 | |
|---|
| 2257 | def iman(fried, N, k): |
|---|
| 2258 | return (N - 1) * fried / (N * (k - 1) - fried) |
|---|
| 2259 | |
|---|
| 2260 | f = friedman(N, k, avranks) |
|---|
| 2261 | im = iman(f, N, k) |
|---|
| 2262 | fdistdof = (k - 1, (k - 1) * (N - 1)) |
|---|
| 2263 | |
|---|
| 2264 | return (f, k - 1), (im, fdistdof) |
|---|
| 2265 | |
|---|
| 2266 | def compute_CD(avranks, N, alpha="0.05", type="nemenyi"): |
|---|
| 2267 | """ Returns critical difference for Nemenyi or Bonferroni-Dunn test |
|---|
| 2268 | according to given alpha (either alpha="0.05" or alpha="0.1") for average |
|---|
| 2269 | ranks and number of tested data sets N. Type can be either "nemenyi" for |
|---|
| 2270 | for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test. |
|---|
| 2271 | """ |
|---|
| 2272 | |
|---|
| 2273 | k = len(avranks) |
|---|
| 2274 | |
|---|
| 2275 | d = {("nemenyi", "0.05"): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774, |
|---|
| 2276 | 2.849705, 2.94832, 3.030879, 3.101730, 3.163684, |
|---|
| 2277 | 3.218654, 3.268004, 3.312739, 3.353618, 3.39123, |
|---|
| 2278 | 3.426041, 3.458425, 3.488685, 3.517073, 3.543799] |
|---|
| 2279 | , ("nemenyi", "0.1"): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516, |
|---|
| 2280 | 2.588521, 2.692732, 2.779884, 2.854606, 2.919889, |
|---|
| 2281 | 2.977768, 3.029694, 3.076733, 3.119693, 3.159199, |
|---|
| 2282 | 3.195743, 3.229723, 3.261461, 3.291224, 3.319233] |
|---|
| 2283 | , ("bonferroni-dunn", "0.05"): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576, |
|---|
| 2284 | 2.638, 2.690, 2.724, 2.773], |
|---|
| 2285 | ("bonferroni-dunn", "0.1"): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326, |
|---|
| 2286 | 2.394, 2.450, 2.498, 2.539]} |
|---|
| 2287 | |
|---|
| 2288 | #can be computed in R as qtukey(0.95, n, Inf)**0.5 |
|---|
| 2289 | #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5) |
|---|
| 2290 | |
|---|
| 2291 | q = d[(type, alpha)] |
|---|
| 2292 | |
|---|
| 2293 | cd = q[k] * (k * (k + 1) / (6.0 * N)) ** 0.5 |
|---|
| 2294 | |
|---|
| 2295 | return cd |
|---|
| 2296 | |
|---|
| 2297 | |
|---|
| 2298 | def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs): |
|---|
| 2299 | """ |
|---|
| 2300 | Draws a CD graph, which is used to display the differences in methods' |
|---|
| 2301 | performance. |
|---|
| 2302 | See Janez Demsar, Statistical Comparisons of Classifiers over |
|---|
| 2303 | Multiple Data Sets, 7(Jan):1--30, 2006. |
|---|
| 2304 | |
|---|
| 2305 | Needs matplotlib to work. |
|---|
| 2306 | |
|---|
| 2307 | :param filename: Output file name (with extension). Formats supported |
|---|
| 2308 | by matplotlib can be used. |
|---|
| 2309 | :param avranks: List of average methods' ranks. |
|---|
| 2310 | :param names: List of methods' names. |
|---|
| 2311 | |
|---|
| 2312 | :param cd: Critical difference. Used for marking methods that whose |
|---|
| 2313 | difference is not statistically significant. |
|---|
| 2314 | :param lowv: The lowest shown rank, if None, use 1. |
|---|
| 2315 | :param highv: The highest shown rank, if None, use len(avranks). |
|---|
| 2316 | :param width: Width of the drawn figure in inches, default 6 in. |
|---|
| 2317 | :param textspace: Space on figure sides left for the description |
|---|
| 2318 | of methods, default 1 in. |
|---|
| 2319 | :param reverse: If True, the lowest rank is on the right. Default\: False. |
|---|
| 2320 | :param cdmethod: None by default. It can be an index of element in avranks |
|---|
| 2321 | or or names which specifies the method which should be |
|---|
| 2322 | marked with an interval. |
|---|
| 2323 | """ |
|---|
| 2324 | if not HAS_MATPLOTLIB: |
|---|
| 2325 | import sys |
|---|
| 2326 | print >> sys.stderr, "Function requires matplotlib. Please install it." |
|---|
| 2327 | return |
|---|
| 2328 | |
|---|
| 2329 | width = float(width) |
|---|
| 2330 | textspace = float(textspace) |
|---|
| 2331 | |
|---|
| 2332 | def nth(l, n): |
|---|
| 2333 | """ |
|---|
| 2334 | Returns only nth elemnt in a list. |
|---|
| 2335 | """ |
|---|
| 2336 | n = lloc(l, n) |
|---|
| 2337 | return [ a[n] for a in l ] |
|---|
| 2338 | |
|---|
| 2339 | def lloc(l, n): |
|---|
| 2340 | """ |
|---|
| 2341 | List location in list of list structure. |
|---|
| 2342 | Enable the use of negative locations: |
|---|
| 2343 | -1 is the last element, -2 second last... |
|---|
| 2344 | """ |
|---|
| 2345 | if n < 0: |
|---|
| 2346 | return len(l[0]) + n |
|---|
| 2347 | else: |
|---|
| 2348 | return n |
|---|
| 2349 | |
|---|
| 2350 | def mxrange(lr): |
|---|
| 2351 | """ |
|---|
| 2352 | Multiple xranges. Can be used to traverse matrices. |
|---|
| 2353 | This function is very slow due to unknown number of |
|---|
| 2354 | parameters. |
|---|
| 2355 | |
|---|
| 2356 | >>> mxrange([3,5]) |
|---|
| 2357 | [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)] |
|---|
| 2358 | |
|---|
| 2359 | >>> mxrange([[3,5,1],[9,0,-3]]) |
|---|
| 2360 | [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)] |
|---|
| 2361 | |
|---|
| 2362 | """ |
|---|
| 2363 | if not len(lr): |
|---|
| 2364 | yield () |
|---|
| 2365 | else: |
|---|
| 2366 | #it can work with single numbers |
|---|
| 2367 | index = lr[0] |
|---|
| 2368 | if type(1) == type(index): |
|---|
| 2369 | index = [ index ] |
|---|
| 2370 | for a in range(*index): |
|---|
| 2371 | for b in mxrange(lr[1:]): |
|---|
| 2372 | yield tuple([a] + list(b)) |
|---|
| 2373 | |
|---|
| 2374 | |
|---|
| 2375 | from matplotlib.figure import Figure |
|---|
| 2376 | from matplotlib.backends.backend_agg import FigureCanvasAgg |
|---|
| 2377 | |
|---|
| 2378 | def print_figure(fig, *args, **kwargs): |
|---|
| 2379 | canvas = FigureCanvasAgg(fig) |
|---|
| 2380 | canvas.print_figure(*args, **kwargs) |
|---|
| 2381 | |
|---|
| 2382 | sums = avranks |
|---|
| 2383 | |
|---|
| 2384 | tempsort = sorted([ (a, i) for i, a in enumerate(sums) ], reverse=reverse) |
|---|
| 2385 | ssums = nth(tempsort, 0) |
|---|
| 2386 | sortidx = nth(tempsort, 1) |
|---|
| 2387 | nnames = [ names[x] for x in sortidx ] |
|---|
| 2388 | |
|---|
| 2389 | if lowv is None: |
|---|
| 2390 | lowv = min(1, int(math.floor(min(ssums)))) |
|---|
| 2391 | if highv is None: |
|---|
| 2392 | highv = max(len(avranks), int(math.ceil(max(ssums)))) |
|---|
| 2393 | |
|---|
| 2394 | cline = 0.4 |
|---|
| 2395 | |
|---|
| 2396 | k = len(sums) |
|---|
| 2397 | |
|---|
| 2398 | lines = None |
|---|
| 2399 | |
|---|
| 2400 | linesblank = 0 |
|---|
| 2401 | scalewidth = width - 2 * textspace |
|---|
| 2402 | |
|---|
| 2403 | def rankpos(rank): |
|---|
| 2404 | if not reverse: |
|---|
| 2405 | a = rank - lowv |
|---|
| 2406 | else: |
|---|
| 2407 | a = highv - rank |
|---|
| 2408 | return textspace + scalewidth / (highv - lowv) * a |
|---|
| 2409 | |
|---|
| 2410 | distanceh = 0.25 |
|---|
| 2411 | |
|---|
| 2412 | if cd and cdmethod is None: |
|---|
| 2413 | |
|---|
| 2414 | #get pairs of non significant methods |
|---|
| 2415 | |
|---|
| 2416 | def get_lines(sums, hsd): |
|---|
| 2417 | |
|---|
| 2418 | #get all pairs |
|---|
| 2419 | lsums = len(sums) |
|---|
| 2420 | allpairs = [ (i, j) for i, j in mxrange([[lsums], [lsums]]) if j > i ] |
|---|
| 2421 | #remove not significant |
|---|
| 2422 | notSig = [ (i, j) for i, j in allpairs if abs(sums[i] - sums[j]) <= hsd ] |
|---|
| 2423 | #keep only longest |
|---|
| 2424 | |
|---|
| 2425 | def no_longer((i, j), notSig): |
|---|
| 2426 | for i1, j1 in notSig: |
|---|
| 2427 | if (i1 <= i and j1 > j) or (i1 < i and j1 >= j): |
|---|
| 2428 | return False |
|---|
| 2429 | return True |
|---|
| 2430 | |
|---|
| 2431 | longest = [ (i, j) for i, j in notSig if no_longer((i, j), notSig) ] |
|---|
| 2432 | |
|---|
| 2433 | return longest |
|---|
| 2434 | |
|---|
| 2435 | lines = get_lines(ssums, cd) |
|---|
| 2436 | linesblank = 0.2 + 0.2 + (len(lines) - 1) * 0.1 |
|---|
| 2437 | |
|---|
| 2438 | #add scale |
|---|
| 2439 | distanceh = 0.25 |
|---|
| 2440 | cline += distanceh |
|---|
| 2441 | |
|---|
| 2442 | #calculate height needed height of an image |
|---|
| 2443 | minnotsignificant = max(2 * 0.2, linesblank) |
|---|
| 2444 | height = cline + ((k + 1) / 2) * 0.2 + minnotsignificant |
|---|
| 2445 | |
|---|
| 2446 | fig = Figure(figsize=(width, height)) |
|---|
| 2447 | ax = fig.add_axes([0, 0, 1, 1]) #reverse y axis |
|---|
| 2448 | ax.set_axis_off() |
|---|
| 2449 | |
|---|
| 2450 | hf = 1. / height # height factor |
|---|
| 2451 | wf = 1. / width |
|---|
| 2452 | |
|---|
| 2453 | def hfl(l): |
|---|
| 2454 | return [ a * hf for a in l ] |
|---|
| 2455 | |
|---|
| 2456 | def wfl(l): |
|---|
| 2457 | return [ a * wf for a in l ] |
|---|
| 2458 | |
|---|
| 2459 | |
|---|
| 2460 | # Upper left corner is (0,0). |
|---|
| 2461 | |
|---|
| 2462 | ax.plot([0, 1], [0, 1], c="w") |
|---|
| 2463 | ax.set_xlim(0, 1) |
|---|
| 2464 | ax.set_ylim(1, 0) |
|---|
| 2465 | |
|---|
| 2466 | def line(l, color='k', **kwargs): |
|---|
| 2467 | """ |
|---|
| 2468 | Input is a list of pairs of points. |
|---|
| 2469 | """ |
|---|
| 2470 | ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs) |
|---|
| 2471 | |
|---|
| 2472 | def text(x, y, s, *args, **kwargs): |
|---|
| 2473 | ax.text(wf * x, hf * y, s, *args, **kwargs) |
|---|
| 2474 | |
|---|
| 2475 | line([(textspace, cline), (width - textspace, cline)], linewidth=0.7) |
|---|
| 2476 | |
|---|
| 2477 | bigtick = 0.1 |
|---|
| 2478 | smalltick = 0.05 |
|---|
| 2479 | |
|---|
| 2480 | |
|---|
| 2481 | import numpy |
|---|
| 2482 | tick = None |
|---|
| 2483 | for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]: |
|---|
| 2484 | tick = smalltick |
|---|
| 2485 | if a == int(a): tick = bigtick |
|---|
| 2486 | line([(rankpos(a), cline - tick / 2), (rankpos(a), cline)], linewidth=0.7) |
|---|
| 2487 | |
|---|
| 2488 | for a in range(lowv, highv + 1): |
|---|
| 2489 | text(rankpos(a), cline - tick / 2 - 0.05, str(a), ha="center", va="bottom") |
|---|
| 2490 | |
|---|
| 2491 | k = len(ssums) |
|---|
| 2492 | |
|---|
| 2493 | for i in range((k + 1) / 2): |
|---|
| 2494 | chei = cline + minnotsignificant + i * 0.2 |
|---|
| 2495 | line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace - 0.1, chei)], linewidth=0.7) |
|---|
| 2496 | text(textspace - 0.2, chei, nnames[i], ha="right", va="center") |
|---|
| 2497 | |
|---|
| 2498 | for i in range((k + 1) / 2, k): |
|---|
| 2499 | chei = cline + minnotsignificant + (k - i - 1) * 0.2 |
|---|
| 2500 | line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace + scalewidth + 0.1, chei)], linewidth=0.7) |
|---|
| 2501 | text(textspace + scalewidth + 0.2, chei, nnames[i], ha="left", va="center") |
|---|
| 2502 | |
|---|
| 2503 | if cd and cdmethod is None: |
|---|
| 2504 | |
|---|
| 2505 | #upper scale |
|---|
| 2506 | if not reverse: |
|---|
| 2507 | begin, end = rankpos(lowv), rankpos(lowv + cd) |
|---|
| 2508 | else: |
|---|
| 2509 | begin, end = rankpos(highv), rankpos(highv - cd) |
|---|
| 2510 | |
|---|
| 2511 | line([(begin, distanceh), (end, distanceh)], linewidth=0.7) |
|---|
| 2512 | line([(begin, distanceh + bigtick / 2), (begin, distanceh - bigtick / 2)], linewidth=0.7) |
|---|
| 2513 | line([(end, distanceh + bigtick / 2), (end, distanceh - bigtick / 2)], linewidth=0.7) |
|---|
| 2514 | text((begin + end) / 2, distanceh - 0.05, "CD", ha="center", va="bottom") |
|---|
| 2515 | |
|---|
| 2516 | #non significance lines |
|---|
| 2517 | def draw_lines(lines, side=0.05, height=0.1): |
|---|
| 2518 | start = cline + 0.2 |
|---|
| 2519 | for l, r in lines: |
|---|
| 2520 | line([(rankpos(ssums[l]) - side, start), (rankpos(ssums[r]) + side, start)], linewidth=2.5) |
|---|
| 2521 | start += height |
|---|
| 2522 | |
|---|
| 2523 | draw_lines(lines) |
|---|
| 2524 | |
|---|
| 2525 | elif cd: |
|---|
| 2526 | begin = rankpos(avranks[cdmethod] - cd) |
|---|
| 2527 | end = rankpos(avranks[cdmethod] + cd) |
|---|
| 2528 | line([(begin, cline), (end, cline)], linewidth=2.5) |
|---|
| 2529 | line([(begin, cline + bigtick / 2), (begin, cline - bigtick / 2)], linewidth=2.5) |
|---|
| 2530 | line([(end, cline + bigtick / 2), (end, cline - bigtick / 2)], linewidth=2.5) |
|---|
| 2531 | |
|---|
| 2532 | print_figure(fig, filename, **kwargs) |
|---|
| 2533 | |
|---|
| 2534 | def mlc_hamming_loss(res): |
|---|
| 2535 | """ |
|---|
| 2536 | Schapire and Singer (2000) presented Hamming Loss, which id defined as: |
|---|
| 2537 | |
|---|
| 2538 | :math:`HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}` |
|---|
| 2539 | """ |
|---|
| 2540 | losses = [0] * res.number_of_learners |
|---|
| 2541 | label_num = len(res.labels) |
|---|
| 2542 | example_num = gettotsize(res) |
|---|
| 2543 | |
|---|
| 2544 | for e in res.results: |
|---|
| 2545 | aclass = e.actual_class |
|---|
| 2546 | for i, labels in enumerate(e.classes): |
|---|
| 2547 | labels = map(int, labels) |
|---|
| 2548 | if len(labels) <> len(aclass): |
|---|
| 2549 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2550 | for j in range(label_num): |
|---|
| 2551 | if labels[j] != aclass[j]: |
|---|
| 2552 | losses[i] += 1 |
|---|
| 2553 | |
|---|
| 2554 | return [float(x) / (label_num * example_num) for x in losses] |
|---|
| 2555 | |
|---|
| 2556 | def mlc_accuracy(res, forgiveness_rate=1.0): |
|---|
| 2557 | """ |
|---|
| 2558 | Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows: |
|---|
| 2559 | |
|---|
| 2560 | :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}` |
|---|
| 2561 | |
|---|
| 2562 | Boutell et al. (2004) give a more generalized version using a parameter :math:`\\alpha \\ge 0`, |
|---|
| 2563 | called forgiveness rate: |
|---|
| 2564 | |
|---|
| 2565 | :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}` |
|---|
| 2566 | """ |
|---|
| 2567 | accuracies = [0.0] * res.number_of_learners |
|---|
| 2568 | example_num = gettotsize(res) |
|---|
| 2569 | |
|---|
| 2570 | for e in res.results: |
|---|
| 2571 | aclass = e.actual_class |
|---|
| 2572 | for i, labels in enumerate(e.classes): |
|---|
| 2573 | labels = map(int, labels) |
|---|
| 2574 | if len(labels) <> len(aclass): |
|---|
| 2575 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2576 | |
|---|
| 2577 | intersection = 0.0 |
|---|
| 2578 | union = 0.0 |
|---|
| 2579 | for real, pred in zip(labels, aclass): |
|---|
| 2580 | if real and pred: |
|---|
| 2581 | intersection += 1 |
|---|
| 2582 | if real or pred: |
|---|
| 2583 | union += 1 |
|---|
| 2584 | |
|---|
| 2585 | if union: |
|---|
| 2586 | accuracies[i] += intersection / union |
|---|
| 2587 | |
|---|
| 2588 | return [math.pow(x / example_num, forgiveness_rate) for x in accuracies] |
|---|
| 2589 | |
|---|
| 2590 | def mlc_precision(res): |
|---|
| 2591 | """ |
|---|
| 2592 | :math:`Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}` |
|---|
| 2593 | """ |
|---|
| 2594 | precisions = [0.0] * res.number_of_learners |
|---|
| 2595 | example_num = gettotsize(res) |
|---|
| 2596 | |
|---|
| 2597 | for e in res.results: |
|---|
| 2598 | aclass = e.actual_class |
|---|
| 2599 | for i, labels in enumerate(e.classes): |
|---|
| 2600 | labels = map(int, labels) |
|---|
| 2601 | if len(labels) <> len(aclass): |
|---|
| 2602 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2603 | |
|---|
| 2604 | intersection = 0.0 |
|---|
| 2605 | predicted = 0.0 |
|---|
| 2606 | for real, pred in zip(labels, aclass): |
|---|
| 2607 | if real and pred: |
|---|
| 2608 | intersection += 1 |
|---|
| 2609 | if real: |
|---|
| 2610 | predicted += 1 |
|---|
| 2611 | if predicted: |
|---|
| 2612 | precisions[i] += intersection / predicted |
|---|
| 2613 | |
|---|
| 2614 | return [x / example_num for x in precisions] |
|---|
| 2615 | |
|---|
| 2616 | def mlc_recall(res): |
|---|
| 2617 | """ |
|---|
| 2618 | :math:`Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}` |
|---|
| 2619 | """ |
|---|
| 2620 | recalls = [0.0] * res.number_of_learners |
|---|
| 2621 | example_num = gettotsize(res) |
|---|
| 2622 | |
|---|
| 2623 | for e in res.results: |
|---|
| 2624 | aclass = e.actual_class |
|---|
| 2625 | for i, labels in enumerate(e.classes): |
|---|
| 2626 | labels = map(int, labels) |
|---|
| 2627 | if len(labels) <> len(aclass): |
|---|
| 2628 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2629 | |
|---|
| 2630 | intersection = 0.0 |
|---|
| 2631 | actual = 0.0 |
|---|
| 2632 | for real, pred in zip(labels, aclass): |
|---|
| 2633 | if real and pred: |
|---|
| 2634 | intersection += 1 |
|---|
| 2635 | if pred: |
|---|
| 2636 | actual += 1 |
|---|
| 2637 | if actual: |
|---|
| 2638 | recalls[i] += intersection / actual |
|---|
| 2639 | |
|---|
| 2640 | return [x / example_num for x in recalls] |
|---|
| 2641 | |
|---|
| 2642 | #def mlc_ranking_loss(res): |
|---|
| 2643 | # pass |
|---|
| 2644 | # |
|---|
| 2645 | #def mlc_average_precision(res): |
|---|
| 2646 | # pass |
|---|
| 2647 | # |
|---|
| 2648 | #def mlc_hierarchical_loss(res): |
|---|
| 2649 | # pass |
|---|
| 2650 | |
|---|
| 2651 | |
|---|
| 2652 | def mt_average_score(res, score, weights=None): |
|---|
| 2653 | """ |
|---|
| 2654 | Compute individual scores for each target and return the (weighted) average. |
|---|
| 2655 | |
|---|
| 2656 | One method can be used to compute scores for all targets or a list of |
|---|
| 2657 | scoring methods can be passed to use different methods for different |
|---|
| 2658 | targets. In the latter case, care has to be taken if the ranges of scoring |
|---|
| 2659 | methods differ. |
|---|
| 2660 | For example, when the first target is scored from -1 to 1 (1 best) and the |
|---|
| 2661 | second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both |
|---|
| 2662 | to a span of 1, and invert the second so that higher scores are better. |
|---|
| 2663 | |
|---|
| 2664 | :param score: Single-target scoring method or a list of such methods |
|---|
| 2665 | (one for each target). |
|---|
| 2666 | :param weights: List of real weights, one for each target, |
|---|
| 2667 | for a weighted average. |
|---|
| 2668 | |
|---|
| 2669 | """ |
|---|
| 2670 | if not len(res.results): |
|---|
| 2671 | raise ValueError, "Cannot compute the score: no examples." |
|---|
| 2672 | if res.number_of_learners < 1: |
|---|
| 2673 | return [] |
|---|
| 2674 | n_classes = len(res.results[0].actual_class) |
|---|
| 2675 | if weights is None: |
|---|
| 2676 | weights = [1.] * n_classes |
|---|
| 2677 | if not isinstance(score, Iterable): |
|---|
| 2678 | score = [score] * n_classes |
|---|
| 2679 | elif len(score) != n_classes: |
|---|
| 2680 | raise ValueError, "Number of scoring methods and targets do not match." |
|---|
| 2681 | # save original classes |
|---|
| 2682 | clsss = [te.classes for te in res.results] |
|---|
| 2683 | aclsss = [te.actual_class for te in res.results] |
|---|
| 2684 | # compute single target scores |
|---|
| 2685 | single_scores = [] |
|---|
| 2686 | for i in range(n_classes): |
|---|
| 2687 | for te, clss, aclss in zip(res.results, clsss, aclsss): |
|---|
| 2688 | te.classes = [cls[i] for cls in clss] |
|---|
| 2689 | te.actual_class = aclss[i] |
|---|
| 2690 | single_scores.append(score[i](res)) |
|---|
| 2691 | # restore original classes |
|---|
| 2692 | for te, clss, aclss in zip(res.results, clsss, aclsss): |
|---|
| 2693 | te.classes = clss |
|---|
| 2694 | te.actual_class = aclss |
|---|
| 2695 | return [sum(w * s for w, s in zip(weights, scores)) / sum(weights) |
|---|
| 2696 | for scores in zip(*single_scores)] |
|---|
| 2697 | |
|---|
| 2698 | def mt_flattened_score(res, score): |
|---|
| 2699 | """ |
|---|
| 2700 | Flatten (concatenate into a single list) the predictions of multiple |
|---|
| 2701 | targets and compute a single-target score. |
|---|
| 2702 | |
|---|
| 2703 | :param score: Single-target scoring method. |
|---|
| 2704 | """ |
|---|
| 2705 | res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations, |
|---|
| 2706 | res.classifier_names, class_values=res.class_values, |
|---|
| 2707 | weights=res.weights, classifiers=res.classifiers, loaded=res.loaded, |
|---|
| 2708 | test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels) |
|---|
| 2709 | for te in res.results: |
|---|
| 2710 | for i, ac in enumerate(te.actual_class): |
|---|
| 2711 | te2 = Orange.evaluation.testing.TestedExample( |
|---|
| 2712 | iteration_number=te.iteration_number, actual_class=ac) |
|---|
| 2713 | for c, p in zip(te.classes, te.probabilities): |
|---|
| 2714 | te2.add_result(c[i], p[i]) |
|---|
| 2715 | res2.results.append(te2) |
|---|
| 2716 | return score(res2) |
|---|
| 2717 | |
|---|
| 2718 | def mt_global_accuracy(res): |
|---|
| 2719 | """ |
|---|
| 2720 | :math:`Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline` |
|---|
| 2721 | |
|---|
| 2722 | :math:`\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.` |
|---|
| 2723 | """ |
|---|
| 2724 | results = [] |
|---|
| 2725 | for l in xrange(res.number_of_learners): |
|---|
| 2726 | n_results = len(res.results) |
|---|
| 2727 | n_correct = 0. |
|---|
| 2728 | |
|---|
| 2729 | for r in res.results: |
|---|
| 2730 | if list(r.classes[l]) == r.actual_class: |
|---|
| 2731 | n_correct+=1 |
|---|
| 2732 | |
|---|
| 2733 | results.append(n_correct/n_results) |
|---|
| 2734 | return results |
|---|
| 2735 | |
|---|
| 2736 | |
|---|
| 2737 | def mt_mean_accuracy(res): |
|---|
| 2738 | """ |
|---|
| 2739 | :math:`\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline` |
|---|
| 2740 | |
|---|
| 2741 | :math:`\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.` |
|---|
| 2742 | """ |
|---|
| 2743 | results = [] |
|---|
| 2744 | for l in xrange(res.number_of_learners): |
|---|
| 2745 | n_classes = len(res.results[0].actual_class) |
|---|
| 2746 | n_results = len(res.results) |
|---|
| 2747 | n_correct = 0. |
|---|
| 2748 | |
|---|
| 2749 | for r in res.results: |
|---|
| 2750 | for i in xrange(n_classes): |
|---|
| 2751 | if r.classes[l][i] == r.actual_class[i]: |
|---|
| 2752 | n_correct+=1 |
|---|
| 2753 | results.append(n_correct/n_classes/n_results) |
|---|
| 2754 | return results |
|---|
| 2755 | |
|---|
| 2756 | |
|---|
| 2757 | |
|---|
| 2758 | ################################################################################ |
|---|
| 2759 | if __name__ == "__main__": |
|---|
| 2760 | avranks = [3.143, 2.000, 2.893, 1.964] |
|---|
| 2761 | names = ["prva", "druga", "tretja", "cetrta" ] |
|---|
| 2762 | cd = compute_CD(avranks, 14) |
|---|
| 2763 | #cd = compute_CD(avranks, 10, type="bonferroni-dunn") |
|---|
| 2764 | print cd |
|---|
| 2765 | |
|---|
| 2766 | print compute_friedman(avranks, 14) |
|---|
| 2767 | |
|---|
| 2768 | #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5) |
|---|