| 1 | import math, functools |
|---|
| 2 | from operator import add |
|---|
| 3 | import numpy |
|---|
| 4 | |
|---|
| 5 | import Orange |
|---|
| 6 | from Orange import statc, corn |
|---|
| 7 | from Orange.misc import deprecated_keywords, deprecated_function_name, \ |
|---|
| 8 | deprecation_warning, environ |
|---|
| 9 | from Orange.evaluation import testing |
|---|
| 10 | |
|---|
| 11 | try: |
|---|
| 12 | import matplotlib |
|---|
| 13 | HAS_MATPLOTLIB = True |
|---|
| 14 | except ImportError: |
|---|
| 15 | matplotlib = None |
|---|
| 16 | HAS_MATPLOTLIB = False |
|---|
| 17 | |
|---|
| 18 | #### Private stuff |
|---|
| 19 | |
|---|
| 20 | def log2(x): |
|---|
| 21 | """Calculate logarithm in base 2.""" |
|---|
| 22 | return math.log(x)/math.log(2) |
|---|
| 23 | |
|---|
| 24 | def check_non_zero(x): |
|---|
| 25 | """Throw Value Error when x = 0.0.""" |
|---|
| 26 | if x==0.0: |
|---|
| 27 | raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0." |
|---|
| 28 | |
|---|
| 29 | def gettotweight(res): |
|---|
| 30 | """Sum all the weights""" |
|---|
| 31 | totweight = reduce(lambda x, y: x+y.weight, res.results, 0) |
|---|
| 32 | if totweight==0.0: |
|---|
| 33 | raise ValueError, "Cannot compute the score: sum of weights is 0.0." |
|---|
| 34 | return totweight |
|---|
| 35 | |
|---|
| 36 | def gettotsize(res): |
|---|
| 37 | """ Get number of result instances """ |
|---|
| 38 | if len(res.results): |
|---|
| 39 | return len(res.results) |
|---|
| 40 | else: |
|---|
| 41 | raise ValueError, "Cannot compute the score: no examples." |
|---|
| 42 | |
|---|
| 43 | |
|---|
| 44 | def split_by_iterations(res): |
|---|
| 45 | """ Splits ExperimentResults of multiple iteratation test into a list |
|---|
| 46 | of ExperimentResults, one for each iteration. |
|---|
| 47 | """ |
|---|
| 48 | if res.number_of_iterations < 2: |
|---|
| 49 | return [res] |
|---|
| 50 | |
|---|
| 51 | ress = [Orange.evaluation.testing.ExperimentResults( |
|---|
| 52 | 1, res.classifier_names, res.class_values, |
|---|
| 53 | res.weights, classifiers=res.classifiers, |
|---|
| 54 | loaded=res.loaded, test_type=res.test_type, labels=res.labels) |
|---|
| 55 | for _ in range(res.number_of_iterations)] |
|---|
| 56 | for te in res.results: |
|---|
| 57 | ress[te.iteration_number].results.append(te) |
|---|
| 58 | return ress |
|---|
| 59 | |
|---|
| 60 | def split_by_classifiers(res): |
|---|
| 61 | """ Splites an instance of :obj:`ExperimentResults` into a list of |
|---|
| 62 | :obj:`ExperimentResults`, one for each classifier. |
|---|
| 63 | """ |
|---|
| 64 | split_res = [] |
|---|
| 65 | for i in range(len(res.classifierNames)): |
|---|
| 66 | r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations, |
|---|
| 67 | [res.classifierNames[i]], res.classValues, |
|---|
| 68 | weights=res.weights, baseClass=res.baseClass, |
|---|
| 69 | classifiers=[res.classifiers[i]] if res.classifiers else [], |
|---|
| 70 | test_type = res.test_type, labels = res.labels) |
|---|
| 71 | r.results = [] |
|---|
| 72 | for te in res.results: |
|---|
| 73 | r.results.append(Orange.evaluation.testing.TestedExample(te.iterationNumber, |
|---|
| 74 | te.actualClass, n=1, weight=te.weight)) |
|---|
| 75 | r.results[-1].classes = [te.classes[i]] |
|---|
| 76 | r.results[-1].probabilities = [te.probabilities[i]] |
|---|
| 77 | split_res.append(r) |
|---|
| 78 | return split_res |
|---|
| 79 | |
|---|
| 80 | |
|---|
| 81 | def class_probabilities_from_res(res, **argkw): |
|---|
| 82 | """Calculate class probabilities""" |
|---|
| 83 | probs = [0.0] * len(res.class_values) |
|---|
| 84 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 85 | for tex in res.results: |
|---|
| 86 | probs[int(tex.actual_class)] += 1.0 |
|---|
| 87 | totweight = gettotsize(res) |
|---|
| 88 | else: |
|---|
| 89 | totweight = 0.0 |
|---|
| 90 | for tex in res.results: |
|---|
| 91 | probs[tex.actual_class] += tex.weight |
|---|
| 92 | totweight += tex.weight |
|---|
| 93 | check_non_zero(totweight) |
|---|
| 94 | return [prob/totweight for prob in probs] |
|---|
| 95 | |
|---|
| 96 | |
|---|
| 97 | @deprecated_keywords({ |
|---|
| 98 | "foldN": "fold_n", |
|---|
| 99 | "reportSE": "report_se", |
|---|
| 100 | "iterationIsOuter": "iteration_is_outer"}) |
|---|
| 101 | def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer): |
|---|
| 102 | # remove empty folds, turn the matrix so that learner is outer |
|---|
| 103 | if iteration_is_outer: |
|---|
| 104 | if not stats: |
|---|
| 105 | raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0." |
|---|
| 106 | number_of_learners = len(stats[0]) |
|---|
| 107 | stats = filter(lambda (x, fN): fN>0.0, zip(stats,fold_n)) |
|---|
| 108 | stats = [ [x[lrn]/fN for x, fN in stats] for lrn in range(number_of_learners)] |
|---|
| 109 | else: |
|---|
| 110 | stats = [ [x/Fn for x, Fn in filter(lambda (x, Fn): Fn > 0.0, zip(lrnD, fold_n))] for lrnD in stats] |
|---|
| 111 | |
|---|
| 112 | if not stats: |
|---|
| 113 | raise ValueError, "Cannot compute the score: no classifiers" |
|---|
| 114 | if not stats[0]: |
|---|
| 115 | raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0." |
|---|
| 116 | |
|---|
| 117 | if report_se: |
|---|
| 118 | return [(statc.mean(x), statc.sterr(x)) for x in stats] |
|---|
| 119 | else: |
|---|
| 120 | return [statc.mean(x) for x in stats] |
|---|
| 121 | |
|---|
| 122 | def ME(res, **argkw): |
|---|
| 123 | MEs = [0.0]*res.number_of_learners |
|---|
| 124 | |
|---|
| 125 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 126 | for tex in res.results: |
|---|
| 127 | MEs = map(lambda res, cls, ac = float(tex.actual_class): |
|---|
| 128 | res + abs(float(cls) - ac), MEs, tex.classes) |
|---|
| 129 | totweight = gettotsize(res) |
|---|
| 130 | else: |
|---|
| 131 | for tex in res.results: |
|---|
| 132 | MEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight: |
|---|
| 133 | res + tw*abs(float(cls) - ac), MEs, tex.classes) |
|---|
| 134 | totweight = gettotweight(res) |
|---|
| 135 | |
|---|
| 136 | return [x/totweight for x in MEs] |
|---|
| 137 | |
|---|
| 138 | MAE = ME |
|---|
| 139 | |
|---|
| 140 | |
|---|
| 141 | class ConfusionMatrix: |
|---|
| 142 | """ |
|---|
| 143 | Classification result summary |
|---|
| 144 | """ |
|---|
| 145 | #: True Positive predictions |
|---|
| 146 | TP = 0. |
|---|
| 147 | #:True Negative predictions |
|---|
| 148 | TN = 0. |
|---|
| 149 | #:False Positive predictions |
|---|
| 150 | FP = 0. |
|---|
| 151 | #: False Negative predictions |
|---|
| 152 | FN = 0. |
|---|
| 153 | |
|---|
| 154 | @deprecated_keywords({"predictedPositive": "predicted_positive", |
|---|
| 155 | "isPositive": "is_positive"}) |
|---|
| 156 | def addTFPosNeg(self, predicted_positive, is_positive, weight = 1.0): |
|---|
| 157 | """ |
|---|
| 158 | Update confusion matrix with result of a single classification |
|---|
| 159 | |
|---|
| 160 | :param predicted_positive: positive class value was predicted |
|---|
| 161 | :param is_positive: correct class value is positive |
|---|
| 162 | :param weight: weight of the selected instance |
|---|
| 163 | """ |
|---|
| 164 | if predicted_positive: |
|---|
| 165 | if is_positive: |
|---|
| 166 | self.TP += weight |
|---|
| 167 | else: |
|---|
| 168 | self.FP += weight |
|---|
| 169 | else: |
|---|
| 170 | if is_positive: |
|---|
| 171 | self.FN += weight |
|---|
| 172 | else: |
|---|
| 173 | self.TN += weight |
|---|
| 174 | |
|---|
| 175 | |
|---|
| 176 | |
|---|
| 177 | ######################################################################### |
|---|
| 178 | # PERFORMANCE MEASURES: |
|---|
| 179 | # Scores for evaluation of numeric predictions |
|---|
| 180 | |
|---|
| 181 | def check_argkw(dct, lst): |
|---|
| 182 | """check_argkw(dct, lst) -> returns true if any items have non-zero value in dct""" |
|---|
| 183 | return reduce(lambda x,y: x or y, [dct.get(k, 0) for k in lst]) |
|---|
| 184 | |
|---|
| 185 | def regression_error(res, **argkw): |
|---|
| 186 | """regression_error(res) -> regression error (default: MSE)""" |
|---|
| 187 | if argkw.get("SE", 0) and res.number_of_iterations > 1: |
|---|
| 188 | # computes the scores for each iteration, then averages |
|---|
| 189 | scores = [[0.0] * res.number_of_iterations for _ in range(res.number_of_learners)] |
|---|
| 190 | norm=None |
|---|
| 191 | if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0): |
|---|
| 192 | norm = [0.0] * res.number_of_iterations |
|---|
| 193 | |
|---|
| 194 | nIter = [0]*res.number_of_iterations # counts examples in each iteration |
|---|
| 195 | a = [0]*res.number_of_iterations # average class in each iteration |
|---|
| 196 | for tex in res.results: |
|---|
| 197 | nIter[tex.iteration_number] += 1 |
|---|
| 198 | a[tex.iteration_number] += float(tex.actual_class) |
|---|
| 199 | a = [a[i]/nIter[i] for i in range(res.number_of_iterations)] |
|---|
| 200 | |
|---|
| 201 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 202 | # iterate accross test cases |
|---|
| 203 | for tex in res.results: |
|---|
| 204 | ai = float(tex.actual_class) |
|---|
| 205 | nIter[tex.iteration_number] += 1 |
|---|
| 206 | |
|---|
| 207 | # compute normalization, if required |
|---|
| 208 | if argkw.get("norm-abs", 0): |
|---|
| 209 | norm[tex.iteration_number] += abs(ai - a[tex.iteration_number]) |
|---|
| 210 | elif argkw.get("norm-sqr", 0): |
|---|
| 211 | norm[tex.iteration_number] += (ai - a[tex.iteration_number])**2 |
|---|
| 212 | |
|---|
| 213 | # iterate accross results of different regressors |
|---|
| 214 | for i, cls in enumerate(tex.classes): |
|---|
| 215 | if argkw.get("abs", 0): |
|---|
| 216 | scores[i][tex.iteration_number] += abs(float(cls) - ai) |
|---|
| 217 | else: |
|---|
| 218 | scores[i][tex.iteration_number] += (float(cls) - ai)**2 |
|---|
| 219 | else: # unweighted<>0 |
|---|
| 220 | raise NotImplementedError, "weighted error scores with SE not implemented yet" |
|---|
| 221 | |
|---|
| 222 | if argkw.get("norm-abs") or argkw.get("norm-sqr"): |
|---|
| 223 | scores = [[x/n for x, n in zip(y, norm)] for y in scores] |
|---|
| 224 | else: |
|---|
| 225 | scores = [[x/ni for x, ni in zip(y, nIter)] for y in scores] |
|---|
| 226 | |
|---|
| 227 | if argkw.get("R2"): |
|---|
| 228 | scores = [[1.0 - x for x in y] for y in scores] |
|---|
| 229 | |
|---|
| 230 | if argkw.get("sqrt", 0): |
|---|
| 231 | scores = [[math.sqrt(x) for x in y] for y in scores] |
|---|
| 232 | |
|---|
| 233 | return [(statc.mean(x), statc.std(x)) for x in scores] |
|---|
| 234 | |
|---|
| 235 | else: # single iteration (testing on a single test set) |
|---|
| 236 | scores = [0.0] * res.number_of_learners |
|---|
| 237 | norm = 0.0 |
|---|
| 238 | |
|---|
| 239 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 240 | a = sum([tex.actual_class for tex in res.results]) \ |
|---|
| 241 | / len(res.results) |
|---|
| 242 | for tex in res.results: |
|---|
| 243 | if argkw.get("abs", 0): |
|---|
| 244 | scores = map(lambda res, cls, ac = float(tex.actual_class): |
|---|
| 245 | res + abs(float(cls) - ac), scores, tex.classes) |
|---|
| 246 | else: |
|---|
| 247 | scores = map(lambda res, cls, ac = float(tex.actual_class): |
|---|
| 248 | res + (float(cls) - ac)**2, scores, tex.classes) |
|---|
| 249 | |
|---|
| 250 | if argkw.get("norm-abs", 0): |
|---|
| 251 | norm += abs(tex.actual_class - a) |
|---|
| 252 | elif argkw.get("norm-sqr", 0): |
|---|
| 253 | norm += (tex.actual_class - a)**2 |
|---|
| 254 | totweight = gettotsize(res) |
|---|
| 255 | else: |
|---|
| 256 | # UNFINISHED |
|---|
| 257 | MSEs = [0.]*res.number_of_learners |
|---|
| 258 | for tex in res.results: |
|---|
| 259 | MSEs = map(lambda res, cls, ac = float(tex.actual_class), |
|---|
| 260 | tw = tex.weight: |
|---|
| 261 | res + tw * (float(cls) - ac)**2, MSEs, tex.classes) |
|---|
| 262 | totweight = gettotweight(res) |
|---|
| 263 | |
|---|
| 264 | if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0): |
|---|
| 265 | scores = [s/norm for s in scores] |
|---|
| 266 | else: # normalize by number of instances (or sum of weights) |
|---|
| 267 | scores = [s/totweight for s in scores] |
|---|
| 268 | |
|---|
| 269 | if argkw.get("R2"): |
|---|
| 270 | scores = [1.0 - s for s in scores] |
|---|
| 271 | |
|---|
| 272 | if argkw.get("sqrt", 0): |
|---|
| 273 | scores = [math.sqrt(x) for x in scores] |
|---|
| 274 | |
|---|
| 275 | return scores |
|---|
| 276 | |
|---|
| 277 | def MSE(res, **argkw): |
|---|
| 278 | """ Computes mean-squared error. """ |
|---|
| 279 | return regression_error(res, **argkw) |
|---|
| 280 | |
|---|
| 281 | def RMSE(res, **argkw): |
|---|
| 282 | """ Computes root mean-squared error. """ |
|---|
| 283 | argkw.setdefault("sqrt", True) |
|---|
| 284 | return regression_error(res, **argkw) |
|---|
| 285 | |
|---|
| 286 | def MAE(res, **argkw): |
|---|
| 287 | """ Computes mean absolute error. """ |
|---|
| 288 | argkw.setdefault("abs", True) |
|---|
| 289 | return regression_error(res, **argkw) |
|---|
| 290 | |
|---|
| 291 | def RSE(res, **argkw): |
|---|
| 292 | """ Computes relative squared error. """ |
|---|
| 293 | argkw.setdefault("norm-sqr", True) |
|---|
| 294 | return regression_error(res, **argkw) |
|---|
| 295 | |
|---|
| 296 | def RRSE(res, **argkw): |
|---|
| 297 | """ Computes relative squared error. """ |
|---|
| 298 | argkw.setdefault("norm-sqr", True) |
|---|
| 299 | argkw.setdefault("sqrt", True) |
|---|
| 300 | return regression_error(res, **argkw) |
|---|
| 301 | |
|---|
| 302 | def RAE(res, **argkw): |
|---|
| 303 | """ Computes relative absolute error. """ |
|---|
| 304 | argkw.setdefault("abs", True) |
|---|
| 305 | argkw.setdefault("norm-abs", True) |
|---|
| 306 | return regression_error(res, **argkw) |
|---|
| 307 | |
|---|
| 308 | def R2(res, **argkw): |
|---|
| 309 | """ Computes the coefficient of determination, R-squared. """ |
|---|
| 310 | argkw.setdefault("norm-sqr", True) |
|---|
| 311 | argkw.setdefault("R2", True) |
|---|
| 312 | return regression_error(res, **argkw) |
|---|
| 313 | |
|---|
| 314 | def MSE_old(res, **argkw): |
|---|
| 315 | """MSE(res) -> mean-squared error""" |
|---|
| 316 | if argkw.get("SE", 0) and res.number_of_iterations > 1: |
|---|
| 317 | MSEs = [[0.0] * res.number_of_iterations for _ in range(res.number_of_learners)] |
|---|
| 318 | nIter = [0]*res.number_of_iterations |
|---|
| 319 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 320 | for tex in res.results: |
|---|
| 321 | ac = float(tex.actual_class) |
|---|
| 322 | nIter[tex.iteration_number] += 1 |
|---|
| 323 | for i, cls in enumerate(tex.classes): |
|---|
| 324 | MSEs[i][tex.iteration_number] += (float(cls) - ac)**2 |
|---|
| 325 | else: |
|---|
| 326 | raise ValueError, "weighted RMSE with SE not implemented yet" |
|---|
| 327 | MSEs = [[x/ni for x, ni in zip(y, nIter)] for y in MSEs] |
|---|
| 328 | if argkw.get("sqrt", 0): |
|---|
| 329 | MSEs = [[math.sqrt(x) for x in y] for y in MSEs] |
|---|
| 330 | return [(statc.mean(x), statc.std(x)) for x in MSEs] |
|---|
| 331 | |
|---|
| 332 | else: |
|---|
| 333 | MSEs = [0.0]*res.number_of_learners |
|---|
| 334 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 335 | for tex in res.results: |
|---|
| 336 | MSEs = map(lambda res, cls, ac = float(tex.actual_class): |
|---|
| 337 | res + (float(cls) - ac)**2, MSEs, tex.classes) |
|---|
| 338 | totweight = gettotsize(res) |
|---|
| 339 | else: |
|---|
| 340 | for tex in res.results: |
|---|
| 341 | MSEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight: |
|---|
| 342 | res + tw * (float(cls) - ac)**2, MSEs, tex.classes) |
|---|
| 343 | totweight = gettotweight(res) |
|---|
| 344 | |
|---|
| 345 | if argkw.get("sqrt", 0): |
|---|
| 346 | MSEs = [math.sqrt(x) for x in MSEs] |
|---|
| 347 | return [x/totweight for x in MSEs] |
|---|
| 348 | |
|---|
| 349 | def RMSE_old(res, **argkw): |
|---|
| 350 | """RMSE(res) -> root mean-squared error""" |
|---|
| 351 | argkw.setdefault("sqrt", 1) |
|---|
| 352 | return MSE_old(res, **argkw) |
|---|
| 353 | |
|---|
| 354 | ######################################################################### |
|---|
| 355 | # PERFORMANCE MEASURES: |
|---|
| 356 | # Scores for evaluation of classifiers |
|---|
| 357 | |
|---|
| 358 | class CA(list): |
|---|
| 359 | """ |
|---|
| 360 | Compute percentage of matches between predicted and actual class. |
|---|
| 361 | |
|---|
| 362 | :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults` |
|---|
| 363 | or list of :obj:`ConfusionMatrix`. |
|---|
| 364 | :param report_se: include standard error in result. |
|---|
| 365 | :param ignore_weights: ignore instance weights. |
|---|
| 366 | :rtype: list of scores, one for each learner. |
|---|
| 367 | |
|---|
| 368 | Standard errors are estimated from deviation of CAs across folds (if |
|---|
| 369 | test_results were produced by cross_validation) or approximated under |
|---|
| 370 | the assumption of normal distribution otherwise. |
|---|
| 371 | """ |
|---|
| 372 | CONFUSION_MATRIX = 0 |
|---|
| 373 | CONFUSION_MATRIX_LIST = 1 |
|---|
| 374 | CLASSIFICATION = 2 |
|---|
| 375 | CROSS_VALIDATION = 3 |
|---|
| 376 | |
|---|
| 377 | @deprecated_keywords({"reportSE": "report_se", |
|---|
| 378 | "unweighted": "ignore_weights"}) |
|---|
| 379 | def __init__(self, test_results, report_se = False, ignore_weights=False): |
|---|
| 380 | super(CA, self).__init__() |
|---|
| 381 | self.report_se = report_se |
|---|
| 382 | self.ignore_weights = ignore_weights |
|---|
| 383 | |
|---|
| 384 | input_type = self.get_input_type(test_results) |
|---|
| 385 | if input_type == self.CONFUSION_MATRIX: |
|---|
| 386 | self[:] = [self.from_confusion_matrix(test_results)] |
|---|
| 387 | elif input_type == self.CONFUSION_MATRIX_LIST: |
|---|
| 388 | self[:] = self.from_confusion_matrix_list(test_results) |
|---|
| 389 | elif input_type == self.CLASSIFICATION: |
|---|
| 390 | self[:] = self.from_classification_results(test_results) |
|---|
| 391 | elif input_type == self.CROSS_VALIDATION: |
|---|
| 392 | self[:] = self.from_crossvalidation_results(test_results) |
|---|
| 393 | |
|---|
| 394 | def from_confusion_matrix(self, cm): |
|---|
| 395 | all_predictions = 0. |
|---|
| 396 | correct_predictions = 0. |
|---|
| 397 | if isinstance(cm, ConfusionMatrix): |
|---|
| 398 | all_predictions += cm.TP+cm.FN+cm.FP+cm.TN |
|---|
| 399 | correct_predictions += cm.TP+cm.TN |
|---|
| 400 | else: |
|---|
| 401 | for r, row in enumerate(cm): |
|---|
| 402 | for c, column in enumerate(row): |
|---|
| 403 | if r == c: |
|---|
| 404 | correct_predictions += column |
|---|
| 405 | all_predictions += column |
|---|
| 406 | |
|---|
| 407 | check_non_zero(all_predictions) |
|---|
| 408 | ca = correct_predictions/all_predictions |
|---|
| 409 | |
|---|
| 410 | if self.report_se: |
|---|
| 411 | return ca, ca*(1-ca)/math.sqrt(all_predictions) |
|---|
| 412 | else: |
|---|
| 413 | return ca |
|---|
| 414 | |
|---|
| 415 | def from_confusion_matrix_list(self, confusion_matrices): |
|---|
| 416 | return [self.from_confusion_matrix(cm) for cm in confusion_matrices] |
|---|
| 417 | |
|---|
| 418 | def from_classification_results(self, test_results): |
|---|
| 419 | CAs = [0.0]*test_results.number_of_learners |
|---|
| 420 | totweight = 0. |
|---|
| 421 | for tex in test_results.results: |
|---|
| 422 | w = 1. if self.ignore_weights else tex.weight |
|---|
| 423 | CAs = map(lambda res, cls: res+(cls==tex.actual_class and w), CAs, tex.classes) |
|---|
| 424 | totweight += w |
|---|
| 425 | check_non_zero(totweight) |
|---|
| 426 | ca = [x/totweight for x in CAs] |
|---|
| 427 | |
|---|
| 428 | if self.report_se: |
|---|
| 429 | return [(x, x*(1-x)/math.sqrt(totweight)) for x in ca] |
|---|
| 430 | else: |
|---|
| 431 | return ca |
|---|
| 432 | |
|---|
| 433 | def from_crossvalidation_results(self, test_results): |
|---|
| 434 | CAsByFold = [[0.0]*test_results.number_of_iterations for _ in range(test_results.number_of_learners)] |
|---|
| 435 | foldN = [0.0]*test_results.number_of_iterations |
|---|
| 436 | |
|---|
| 437 | for tex in test_results.results: |
|---|
| 438 | w = 1. if self.ignore_weights else tex.weight |
|---|
| 439 | for lrn in range(test_results.number_of_learners): |
|---|
| 440 | CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class) and w |
|---|
| 441 | foldN[tex.iteration_number] += w |
|---|
| 442 | |
|---|
| 443 | return statistics_by_folds(CAsByFold, foldN, self.report_se, False) |
|---|
| 444 | |
|---|
| 445 | def get_input_type(self, test_results): |
|---|
| 446 | if isinstance(test_results, ConfusionMatrix): |
|---|
| 447 | return self.CONFUSION_MATRIX |
|---|
| 448 | elif isinstance(test_results, testing.ExperimentResults): |
|---|
| 449 | if test_results.number_of_iterations == 1: |
|---|
| 450 | return self.CLASSIFICATION |
|---|
| 451 | else: |
|---|
| 452 | return self.CROSS_VALIDATION |
|---|
| 453 | elif isinstance(test_results, list): |
|---|
| 454 | return self.CONFUSION_MATRIX_LIST |
|---|
| 455 | |
|---|
| 456 | |
|---|
| 457 | @deprecated_keywords({"reportSE": "report_se", |
|---|
| 458 | "unweighted": "ignore_weights"}) |
|---|
| 459 | def AP(res, report_se = False, ignore_weights=False, **argkw): |
|---|
| 460 | """ Computes the average probability assigned to the correct class. """ |
|---|
| 461 | if res.number_of_iterations == 1: |
|---|
| 462 | APs=[0.0]*res.number_of_learners |
|---|
| 463 | if ignore_weights or not res.weights: |
|---|
| 464 | for tex in res.results: |
|---|
| 465 | APs = map(lambda res, probs: res + probs[tex.actual_class], APs, tex.probabilities) |
|---|
| 466 | totweight = gettotsize(res) |
|---|
| 467 | else: |
|---|
| 468 | totweight = 0. |
|---|
| 469 | for tex in res.results: |
|---|
| 470 | APs = map(lambda res, probs: res + probs[tex.actual_class]*tex.weight, APs, tex.probabilities) |
|---|
| 471 | totweight += tex.weight |
|---|
| 472 | check_non_zero(totweight) |
|---|
| 473 | return [AP/totweight for AP in APs] |
|---|
| 474 | |
|---|
| 475 | APsByFold = [[0.0]*res.number_of_learners for _ in range(res.number_of_iterations)] |
|---|
| 476 | foldN = [0.0] * res.number_of_iterations |
|---|
| 477 | if ignore_weights or not res.weights: |
|---|
| 478 | for tex in res.results: |
|---|
| 479 | APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class], APsByFold[tex.iteration_number], tex.probabilities) |
|---|
| 480 | foldN[tex.iteration_number] += 1 |
|---|
| 481 | else: |
|---|
| 482 | for tex in res.results: |
|---|
| 483 | APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class] * tex.weight, APsByFold[tex.iteration_number], tex.probabilities) |
|---|
| 484 | foldN[tex.iteration_number] += tex.weight |
|---|
| 485 | |
|---|
| 486 | return statistics_by_folds(APsByFold, foldN, report_se, True) |
|---|
| 487 | |
|---|
| 488 | |
|---|
| 489 | @deprecated_keywords({"reportSE": "report_se", |
|---|
| 490 | "unweighted": "ignore_weights"}) |
|---|
| 491 | def Brier_score(res, report_se = False, ignore_weights=False, **argkw): |
|---|
| 492 | """ Computes the Brier's score, defined as the average (over test examples) |
|---|
| 493 | of sumx(t(x)-p(x))2, where x is a class, t(x) is 1 for the correct class |
|---|
| 494 | and 0 for the others, and p(x) is the probability that the classifier |
|---|
| 495 | assigned to the class x |
|---|
| 496 | """ |
|---|
| 497 | # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where |
|---|
| 498 | # x is class, |
|---|
| 499 | # t(x) is 0 for 'wrong' and 1 for 'correct' class |
|---|
| 500 | # p(x) is predicted probabilty. |
|---|
| 501 | # There's a trick: since t(x) is zero for all classes but the |
|---|
| 502 | # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1 |
|---|
| 503 | # Since +1 is there for each example, it adds 1 to the average |
|---|
| 504 | # We skip the +1 inside the sum and add it just at the end of the function |
|---|
| 505 | # We take max(result, 0) to avoid -0.0000x due to rounding errors |
|---|
| 506 | |
|---|
| 507 | if res.number_of_iterations == 1: |
|---|
| 508 | MSEs=[0.0]*res.number_of_learners |
|---|
| 509 | if ignore_weights or not res.weights: |
|---|
| 510 | totweight = 0.0 |
|---|
| 511 | for tex in res.results: |
|---|
| 512 | MSEs = map(lambda res, probs: |
|---|
| 513 | res + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities) |
|---|
| 514 | totweight += tex.weight |
|---|
| 515 | else: |
|---|
| 516 | for tex in res.results: |
|---|
| 517 | MSEs = map(lambda res, probs: |
|---|
| 518 | res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities) |
|---|
| 519 | totweight = gettotweight(res) |
|---|
| 520 | check_non_zero(totweight) |
|---|
| 521 | if report_se: |
|---|
| 522 | return [(max(x/totweight+1.0, 0), 0) for x in MSEs] ## change this, not zero!!! |
|---|
| 523 | else: |
|---|
| 524 | return [max(x/totweight+1.0, 0) for x in MSEs] |
|---|
| 525 | |
|---|
| 526 | BSs = [[0.0]*res.number_of_learners for _ in range(res.number_of_iterations)] |
|---|
| 527 | foldN = [0.] * res.number_of_iterations |
|---|
| 528 | |
|---|
| 529 | if ignore_weights or not res.weights: |
|---|
| 530 | for tex in res.results: |
|---|
| 531 | BSs[tex.iteration_number] = map(lambda rr, probs: |
|---|
| 532 | rr + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities) |
|---|
| 533 | foldN[tex.iteration_number] += 1 |
|---|
| 534 | else: |
|---|
| 535 | for tex in res.results: |
|---|
| 536 | BSs[tex.iteration_number] = map(lambda res, probs: |
|---|
| 537 | res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities) |
|---|
| 538 | foldN[tex.iteration_number] += tex.weight |
|---|
| 539 | |
|---|
| 540 | stats = statistics_by_folds(BSs, foldN, report_se, True) |
|---|
| 541 | if report_se: |
|---|
| 542 | return [(x+1.0, y) for x, y in stats] |
|---|
| 543 | else: |
|---|
| 544 | return [x+1.0 for x in stats] |
|---|
| 545 | |
|---|
| 546 | def BSS(res, **argkw): |
|---|
| 547 | return [1-x/2 for x in apply(Brier_score, (res, ), argkw)] |
|---|
| 548 | |
|---|
| 549 | def IS_ex(Pc, P): |
|---|
| 550 | """Pc aposterior probability, P aprior""" |
|---|
| 551 | if Pc>=P: |
|---|
| 552 | return -log2(P)+log2(Pc) |
|---|
| 553 | else: |
|---|
| 554 | return -(-log2(1-P)+log2(1-Pc)) |
|---|
| 555 | |
|---|
| 556 | |
|---|
| 557 | @deprecated_keywords({"reportSE": "report_se"}) |
|---|
| 558 | def IS(res, apriori=None, report_se = False, **argkw): |
|---|
| 559 | """ Computes the information score as defined by |
|---|
| 560 | `Kononenko and Bratko (1991) \ |
|---|
| 561 | <http://www.springerlink.com/content/g5p7473160476612/>`_. |
|---|
| 562 | Argument :obj:`apriori` gives the apriori class |
|---|
| 563 | distribution; if it is omitted, the class distribution is computed from |
|---|
| 564 | the actual classes of examples in :obj:`res`. |
|---|
| 565 | """ |
|---|
| 566 | if not apriori: |
|---|
| 567 | apriori = class_probabilities_from_res(res) |
|---|
| 568 | |
|---|
| 569 | if res.number_of_iterations==1: |
|---|
| 570 | ISs = [0.0]*res.number_of_learners |
|---|
| 571 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 572 | for tex in res.results: |
|---|
| 573 | for i in range(len(tex.probabilities)): |
|---|
| 574 | cls = tex.actual_class |
|---|
| 575 | ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) |
|---|
| 576 | totweight = gettotsize(res) |
|---|
| 577 | else: |
|---|
| 578 | for tex in res.results: |
|---|
| 579 | for i in range(len(tex.probabilities)): |
|---|
| 580 | cls = tex.actual_class |
|---|
| 581 | ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight |
|---|
| 582 | totweight = gettotweight(res) |
|---|
| 583 | if report_se: |
|---|
| 584 | return [(IS/totweight,0) for IS in ISs] |
|---|
| 585 | else: |
|---|
| 586 | return [IS/totweight for IS in ISs] |
|---|
| 587 | |
|---|
| 588 | |
|---|
| 589 | ISs = [[0.0]*res.number_of_iterations for _ in range(res.number_of_learners)] |
|---|
| 590 | foldN = [0.] * res.number_of_iterations |
|---|
| 591 | |
|---|
| 592 | # compute info scores for each fold |
|---|
| 593 | if argkw.get("unweighted", 0) or not res.weights: |
|---|
| 594 | for tex in res.results: |
|---|
| 595 | for i in range(len(tex.probabilities)): |
|---|
| 596 | cls = tex.actual_class |
|---|
| 597 | ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls]) |
|---|
| 598 | foldN[tex.iteration_number] += 1 |
|---|
| 599 | else: |
|---|
| 600 | for tex in res.results: |
|---|
| 601 | for i in range(len(tex.probabilities)): |
|---|
| 602 | cls = tex.actual_class |
|---|
| 603 | ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight |
|---|
| 604 | foldN[tex.iteration_number] += tex.weight |
|---|
| 605 | |
|---|
| 606 | return statistics_by_folds(ISs, foldN, report_se, False) |
|---|
| 607 | |
|---|
| 608 | |
|---|
| 609 | def Wilcoxon(res, statistics, **argkw): |
|---|
| 610 | res1, res2 = [], [] |
|---|
| 611 | for ri in split_by_iterations(res): |
|---|
| 612 | stats = statistics(ri, **argkw) |
|---|
| 613 | if len(stats) != 2: |
|---|
| 614 | raise TypeError, "Wilcoxon compares two classifiers, no more, no less" |
|---|
| 615 | res1.append(stats[0]) |
|---|
| 616 | res2.append(stats[1]) |
|---|
| 617 | return statc.wilcoxont(res1, res2) |
|---|
| 618 | |
|---|
| 619 | def rank_difference(res, statistics, **argkw): |
|---|
| 620 | if not res.results: |
|---|
| 621 | raise TypeError, "no experiments" |
|---|
| 622 | |
|---|
| 623 | k = len(res.results[0].classes) |
|---|
| 624 | if k<2: |
|---|
| 625 | raise TypeError, "nothing to compare (less than two classifiers given)" |
|---|
| 626 | if k==2: |
|---|
| 627 | return apply(Wilcoxon, (res, statistics), argkw) |
|---|
| 628 | else: |
|---|
| 629 | return apply(Friedman, (res, statistics), argkw) |
|---|
| 630 | |
|---|
| 631 | |
|---|
| 632 | @deprecated_keywords({"res": "test_results", |
|---|
| 633 | "classIndex": "class_index", |
|---|
| 634 | "unweighted": "ignore_weights"}) |
|---|
| 635 | def confusion_matrices(test_results, class_index=-1, |
|---|
| 636 | ignore_weights=False, cutoff=.5): |
|---|
| 637 | """ |
|---|
| 638 | Return confusion matrices for test_results. |
|---|
| 639 | |
|---|
| 640 | :param test_results: test results |
|---|
| 641 | :param class_index: index of class value for which the confusion matrices |
|---|
| 642 | are to be computed. |
|---|
| 643 | :param ignore_weights: ignore instance weights. |
|---|
| 644 | :params cutoff: cutoff for probability |
|---|
| 645 | |
|---|
| 646 | :rtype: list of :obj:`ConfusionMatrix` |
|---|
| 647 | """ |
|---|
| 648 | tfpns = [ConfusionMatrix() for _ in range(test_results.number_of_learners)] |
|---|
| 649 | |
|---|
| 650 | if class_index<0: |
|---|
| 651 | numberOfClasses = len(test_results.class_values) |
|---|
| 652 | if class_index < -1 or numberOfClasses > 2: |
|---|
| 653 | cm = [[[0.0] * numberOfClasses for _ in range(numberOfClasses)] for _ in range(test_results.number_of_learners)] |
|---|
| 654 | if ignore_weights or not test_results.weights: |
|---|
| 655 | for tex in test_results.results: |
|---|
| 656 | trueClass = int(tex.actual_class) |
|---|
| 657 | for li, pred in enumerate(tex.classes): |
|---|
| 658 | predClass = int(pred) |
|---|
| 659 | if predClass < numberOfClasses: |
|---|
| 660 | cm[li][trueClass][predClass] += 1 |
|---|
| 661 | else: |
|---|
| 662 | for tex in test_results.results: |
|---|
| 663 | trueClass = int(tex.actual_class) |
|---|
| 664 | for li, pred in tex.classes: |
|---|
| 665 | predClass = int(pred) |
|---|
| 666 | if predClass < numberOfClasses: |
|---|
| 667 | cm[li][trueClass][predClass] += tex.weight |
|---|
| 668 | return cm |
|---|
| 669 | |
|---|
| 670 | elif test_results.baseClass>=0: |
|---|
| 671 | class_index = test_results.baseClass |
|---|
| 672 | else: |
|---|
| 673 | class_index = 1 |
|---|
| 674 | |
|---|
| 675 | if cutoff != .5: |
|---|
| 676 | if ignore_weights or not test_results.weights: |
|---|
| 677 | for lr in test_results.results: |
|---|
| 678 | isPositive=(lr.actual_class==class_index) |
|---|
| 679 | for i in range(test_results.number_of_learners): |
|---|
| 680 | tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive) |
|---|
| 681 | else: |
|---|
| 682 | for lr in test_results.results: |
|---|
| 683 | isPositive=(lr.actual_class==class_index) |
|---|
| 684 | for i in range(test_results.number_of_learners): |
|---|
| 685 | tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive, lr.weight) |
|---|
| 686 | else: |
|---|
| 687 | if ignore_weights or not test_results.weights: |
|---|
| 688 | for lr in test_results.results: |
|---|
| 689 | isPositive=(lr.actual_class==class_index) |
|---|
| 690 | for i in range(test_results.number_of_learners): |
|---|
| 691 | tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive) |
|---|
| 692 | else: |
|---|
| 693 | for lr in test_results.results: |
|---|
| 694 | isPositive=(lr.actual_class==class_index) |
|---|
| 695 | for i in range(test_results.number_of_learners): |
|---|
| 696 | tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive, lr.weight) |
|---|
| 697 | return tfpns |
|---|
| 698 | |
|---|
| 699 | |
|---|
| 700 | # obsolete (renamed) |
|---|
| 701 | compute_confusion_matrices = confusion_matrices |
|---|
| 702 | |
|---|
| 703 | |
|---|
| 704 | @deprecated_keywords({"confusionMatrix": "confusion_matrix"}) |
|---|
| 705 | def confusion_chi_square(confusion_matrix): |
|---|
| 706 | """ |
|---|
| 707 | Return chi square statistic of the confusion matrix |
|---|
| 708 | (higher value indicates that prediction is not by chance). |
|---|
| 709 | """ |
|---|
| 710 | if isinstance(confusion_matrix, ConfusionMatrix) or \ |
|---|
| 711 | not isinstance(confusion_matrix[1], list): |
|---|
| 712 | return _confusion_chi_square(confusion_matrix) |
|---|
| 713 | else: |
|---|
| 714 | return map(_confusion_chi_square, confusion_matrix) |
|---|
| 715 | |
|---|
| 716 | def _confusion_chi_square(confusion_matrix): |
|---|
| 717 | if isinstance(confusion_matrix, ConfusionMatrix): |
|---|
| 718 | c = confusion_matrix |
|---|
| 719 | confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]] |
|---|
| 720 | dim = len(confusion_matrix) |
|---|
| 721 | rowPriors = [sum(r) for r in confusion_matrix] |
|---|
| 722 | colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)] |
|---|
| 723 | total = sum(rowPriors) |
|---|
| 724 | rowPriors = [r/total for r in rowPriors] |
|---|
| 725 | colPriors = [r/total for r in colPriors] |
|---|
| 726 | ss = 0 |
|---|
| 727 | for ri, row in enumerate(confusion_matrix): |
|---|
| 728 | for ci, o in enumerate(row): |
|---|
| 729 | e = total * rowPriors[ri] * colPriors[ci] |
|---|
| 730 | if not e: |
|---|
| 731 | return -1, -1, -1 |
|---|
| 732 | ss += (o-e)**2 / e |
|---|
| 733 | df = (dim-1)**2 |
|---|
| 734 | return ss, df, statc.chisqprob(ss, df) |
|---|
| 735 | |
|---|
| 736 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 737 | def sens(confusion_matrix): |
|---|
| 738 | """ |
|---|
| 739 | Return `sensitivity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ |
|---|
| 740 | (proportion of actual positives which are correctly identified as such). |
|---|
| 741 | """ |
|---|
| 742 | if type(confusion_matrix) == list: |
|---|
| 743 | return [sens(cm) for cm in confusion_matrix] |
|---|
| 744 | else: |
|---|
| 745 | tot = confusion_matrix.TP+confusion_matrix.FN |
|---|
| 746 | if tot < 1e-6: |
|---|
| 747 | import warnings |
|---|
| 748 | warnings.warn("Can't compute sensitivity: one or both classes have no instances") |
|---|
| 749 | return -1 |
|---|
| 750 | |
|---|
| 751 | return confusion_matrix.TP/tot |
|---|
| 752 | |
|---|
| 753 | |
|---|
| 754 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 755 | def recall(confusion_matrix): |
|---|
| 756 | """ |
|---|
| 757 | Return `recall <http://en.wikipedia.org/wiki/Precision_and_recall>`_ |
|---|
| 758 | (fraction of relevant instances that are retrieved). |
|---|
| 759 | """ |
|---|
| 760 | return sens(confusion_matrix) |
|---|
| 761 | |
|---|
| 762 | |
|---|
| 763 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 764 | def spec(confusion_matrix): |
|---|
| 765 | """ |
|---|
| 766 | Return `specificity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ |
|---|
| 767 | (proportion of negatives which are correctly identified). |
|---|
| 768 | """ |
|---|
| 769 | if type(confusion_matrix) == list: |
|---|
| 770 | return [spec(cm) for cm in confusion_matrix] |
|---|
| 771 | else: |
|---|
| 772 | tot = confusion_matrix.FP+confusion_matrix.TN |
|---|
| 773 | if tot < 1e-6: |
|---|
| 774 | import warnings |
|---|
| 775 | warnings.warn("Can't compute specificity: one or both classes have no instances") |
|---|
| 776 | return -1 |
|---|
| 777 | return confusion_matrix.TN/tot |
|---|
| 778 | |
|---|
| 779 | |
|---|
| 780 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 781 | def PPV(confusion_matrix): |
|---|
| 782 | """ |
|---|
| 783 | Return `positive predictive value <http://en.wikipedia.org/wiki/Positive_predictive_value>`_ |
|---|
| 784 | (proportion of subjects with positive test results who are correctly diagnosed).""" |
|---|
| 785 | if type(confusion_matrix) == list: |
|---|
| 786 | return [PPV(cm) for cm in confusion_matrix] |
|---|
| 787 | else: |
|---|
| 788 | tot = confusion_matrix.TP+confusion_matrix.FP |
|---|
| 789 | if tot < 1e-6: |
|---|
| 790 | import warnings |
|---|
| 791 | warnings.warn("Can't compute PPV: one or both classes have no instances") |
|---|
| 792 | return -1 |
|---|
| 793 | return confusion_matrix.TP/tot |
|---|
| 794 | |
|---|
| 795 | |
|---|
| 796 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 797 | def precision(confusion_matrix): |
|---|
| 798 | """ |
|---|
| 799 | Return `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_ |
|---|
| 800 | (retrieved instances that are relevant). |
|---|
| 801 | """ |
|---|
| 802 | return PPV(confusion_matrix) |
|---|
| 803 | |
|---|
| 804 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 805 | def NPV(confusion_matrix): |
|---|
| 806 | """Return `negative predictive value <http://en.wikipedia.org/wiki/Negative_predictive_value>`_ |
|---|
| 807 | (proportion of subjects with a negative test result who are correctly |
|---|
| 808 | diagnosed). |
|---|
| 809 | """ |
|---|
| 810 | if type(confusion_matrix) == list: |
|---|
| 811 | return [NPV(cm) for cm in confusion_matrix] |
|---|
| 812 | else: |
|---|
| 813 | tot = confusion_matrix.FN+confusion_matrix.TN |
|---|
| 814 | if tot < 1e-6: |
|---|
| 815 | import warnings |
|---|
| 816 | warnings.warn("Can't compute NPV: one or both classes have no instances") |
|---|
| 817 | return -1 |
|---|
| 818 | return confusion_matrix.TN/tot |
|---|
| 819 | |
|---|
| 820 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 821 | def F1(confusion_matrix): |
|---|
| 822 | """Return `F1 score <http://en.wikipedia.org/wiki/F1_score>`_ |
|---|
| 823 | (harmonic mean of precision and recall).""" |
|---|
| 824 | if type(confusion_matrix) == list: |
|---|
| 825 | return [F1(cm) for cm in confusion_matrix] |
|---|
| 826 | else: |
|---|
| 827 | p = precision(confusion_matrix) |
|---|
| 828 | r = recall(confusion_matrix) |
|---|
| 829 | if p + r > 0: |
|---|
| 830 | return 2. * p * r / (p + r) |
|---|
| 831 | else: |
|---|
| 832 | import warnings |
|---|
| 833 | warnings.warn("Can't compute F1: P + R is zero or not defined") |
|---|
| 834 | return -1 |
|---|
| 835 | |
|---|
| 836 | |
|---|
| 837 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 838 | def Falpha(confusion_matrix, alpha=1.0): |
|---|
| 839 | """Return the alpha-mean of precision and recall over the given confusion matrix.""" |
|---|
| 840 | if type(confusion_matrix) == list: |
|---|
| 841 | return [Falpha(cm, alpha=alpha) for cm in confusion_matrix] |
|---|
| 842 | else: |
|---|
| 843 | p = precision(confusion_matrix) |
|---|
| 844 | r = recall(confusion_matrix) |
|---|
| 845 | return (1. + alpha) * p * r / (alpha * p + r) |
|---|
| 846 | |
|---|
| 847 | |
|---|
| 848 | @deprecated_keywords({"confm": "confusion_matrix"}) |
|---|
| 849 | def MCC(confusion_matrix): |
|---|
| 850 | """ |
|---|
| 851 | Return `Matthew correlation coefficient <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_ |
|---|
| 852 | (correlation coefficient between the observed and predicted binary classifications) |
|---|
| 853 | """ |
|---|
| 854 | # code by Boris Gorelik |
|---|
| 855 | if type(confusion_matrix) == list: |
|---|
| 856 | return [MCC(cm) for cm in confusion_matrix] |
|---|
| 857 | else: |
|---|
| 858 | truePositive = confusion_matrix.TP |
|---|
| 859 | trueNegative = confusion_matrix.TN |
|---|
| 860 | falsePositive = confusion_matrix.FP |
|---|
| 861 | falseNegative = confusion_matrix.FN |
|---|
| 862 | |
|---|
| 863 | try: |
|---|
| 864 | r = (((truePositive * trueNegative) - (falsePositive * falseNegative))/ |
|---|
| 865 | math.sqrt( (truePositive + falsePositive) * |
|---|
| 866 | ( truePositive + falseNegative ) * |
|---|
| 867 | ( trueNegative + falsePositive ) * |
|---|
| 868 | ( trueNegative + falseNegative ) ) |
|---|
| 869 | ) |
|---|
| 870 | except ZeroDivisionError: |
|---|
| 871 | # Zero difision occurs when there is either no true positives |
|---|
| 872 | # or no true negatives i.e. the problem contains only one |
|---|
| 873 | # type of classes. |
|---|
| 874 | import warnings |
|---|
| 875 | warnings.warn("Can't compute MCC: TP or TN is zero or not defined") |
|---|
| 876 | r = None |
|---|
| 877 | |
|---|
| 878 | return r |
|---|
| 879 | |
|---|
| 880 | |
|---|
| 881 | @deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"}) |
|---|
| 882 | def scotts_pi(confusion_matrix, b_is_list_of_matrices=True): |
|---|
| 883 | """Compute Scott's Pi for measuring inter-rater agreement for nominal data |
|---|
| 884 | |
|---|
| 885 | http://en.wikipedia.org/wiki/Scott%27s_Pi |
|---|
| 886 | Scott's Pi is a statistic for measuring inter-rater reliability for nominal |
|---|
| 887 | raters. |
|---|
| 888 | |
|---|
| 889 | @param confusion_matrix: confusion matrix, or list of confusion matrices. To obtain |
|---|
| 890 | non-binary confusion matrix, call |
|---|
| 891 | Orange.evaluation.scoring.compute_confusion_matrices and set the |
|---|
| 892 | classIndex parameter to -2. |
|---|
| 893 | @param b_is_list_of_matrices: specifies whether confm is list of matrices. |
|---|
| 894 | This function needs to operate on non-binary |
|---|
| 895 | confusion matrices, which are represented by python |
|---|
| 896 | lists, therefore one needs a way to distinguish |
|---|
| 897 | between a single matrix and list of matrices |
|---|
| 898 | """ |
|---|
| 899 | |
|---|
| 900 | if b_is_list_of_matrices: |
|---|
| 901 | try: |
|---|
| 902 | return [scotts_pi(cm, b_is_list_of_matrices=False) for cm in confusion_matrix] |
|---|
| 903 | except TypeError: |
|---|
| 904 | # Nevermind the parameter, maybe this is a "conventional" binary |
|---|
| 905 | # confusion matrix and bIsListOfMatrices was specified by mistake |
|---|
| 906 | return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False) |
|---|
| 907 | else: |
|---|
| 908 | if isinstance(confusion_matrix, ConfusionMatrix): |
|---|
| 909 | confusion_matrix = numpy.array( [[confusion_matrix.TP, confusion_matrix.FN], |
|---|
| 910 | [confusion_matrix.FP, confusion_matrix.TN]], dtype=float) |
|---|
| 911 | else: |
|---|
| 912 | confusion_matrix = numpy.array(confusion_matrix, dtype=float) |
|---|
| 913 | |
|---|
| 914 | marginalSumOfRows = numpy.sum(confusion_matrix, axis=0) |
|---|
| 915 | marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1) |
|---|
| 916 | jointProportion = (marginalSumOfColumns + marginalSumOfRows)/ \ |
|---|
| 917 | (2.0 * numpy.sum(confusion_matrix)) |
|---|
| 918 | # In the eq. above, 2.0 is what the Wikipedia page calls |
|---|
| 919 | # the number of annotators. Here we have two annotators: |
|---|
| 920 | # the observed (true) labels (annotations) and the predicted by |
|---|
| 921 | # the learners. |
|---|
| 922 | |
|---|
| 923 | prExpected = numpy.sum(jointProportion ** 2) |
|---|
| 924 | prActual = numpy.sum(numpy.diag(confusion_matrix)) /numpy.sum(confusion_matrix) |
|---|
| 925 | |
|---|
| 926 | ret = (prActual - prExpected) / (1.0 - prExpected) |
|---|
| 927 | return ret |
|---|
| 928 | |
|---|
| 929 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 930 | "unweighted": "ignore_weights"}) |
|---|
| 931 | def AUCWilcoxon(res, class_index=-1, ignore_weights=False, **argkw): |
|---|
| 932 | """ Computes the area under ROC (AUC) and its standard error using |
|---|
| 933 | Wilcoxon's approach proposed by Hanley and McNeal (1982). If |
|---|
| 934 | :obj:`classIndex` is not specified, the first class is used as |
|---|
| 935 | "the positive" and others are negative. The result is a list of |
|---|
| 936 | tuples (aROC, standard error). |
|---|
| 937 | |
|---|
| 938 | If test results consist of multiple folds, you need to split them using |
|---|
| 939 | :obj:`split_by_iterations` and perform this test on each fold separately. |
|---|
| 940 | """ |
|---|
| 941 | useweights = res.weights and not ignore_weights |
|---|
| 942 | problists, tots = corn.computeROCCumulative(res, class_index, useweights) |
|---|
| 943 | |
|---|
| 944 | results=[] |
|---|
| 945 | |
|---|
| 946 | totPos, totNeg = tots[1], tots[0] |
|---|
| 947 | N = totPos + totNeg |
|---|
| 948 | for plist in problists: |
|---|
| 949 | highPos, lowNeg = totPos, 0.0 |
|---|
| 950 | W, Q1, Q2 = 0.0, 0.0, 0.0 |
|---|
| 951 | for prob in plist: |
|---|
| 952 | thisPos, thisNeg = prob[1][1], prob[1][0] |
|---|
| 953 | highPos -= thisPos |
|---|
| 954 | W += thisNeg * (highPos + thisPos/2.) |
|---|
| 955 | Q2 += thisPos * (lowNeg**2 + lowNeg*thisNeg + thisNeg**2 /3.) |
|---|
| 956 | Q1 += thisNeg * (highPos**2 + highPos*thisPos + thisPos**2 /3.) |
|---|
| 957 | |
|---|
| 958 | lowNeg += thisNeg |
|---|
| 959 | |
|---|
| 960 | W /= (totPos*totNeg) |
|---|
| 961 | Q1 /= (totNeg*totPos**2) |
|---|
| 962 | Q2 /= (totPos*totNeg**2) |
|---|
| 963 | |
|---|
| 964 | SE = math.sqrt( (W*(1-W) + (totPos-1)*(Q1-W**2) + (totNeg-1)*(Q2-W**2)) / (totPos*totNeg) ) |
|---|
| 965 | results.append((W, SE)) |
|---|
| 966 | return results |
|---|
| 967 | |
|---|
| 968 | AROC = AUCWilcoxon # for backward compatibility, AROC is obsolote |
|---|
| 969 | |
|---|
| 970 | |
|---|
| 971 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 972 | "unweighted": "ignore_weights"}) |
|---|
| 973 | def compare_2_AUCs(res, lrn1, lrn2, class_index=-1, |
|---|
| 974 | ignore_weights=False, **argkw): |
|---|
| 975 | return corn.compare2ROCs(res, lrn1, lrn2, class_index, res.weights and not ignore_weights) |
|---|
| 976 | |
|---|
| 977 | compare_2_AROCs = compare_2_AUCs # for backward compatibility, compare_2_AROCs is obsolote |
|---|
| 978 | |
|---|
| 979 | |
|---|
| 980 | @deprecated_keywords({"classIndex": "class_index"}) |
|---|
| 981 | def compute_ROC(res, class_index=-1): |
|---|
| 982 | """ Computes a ROC curve as a list of (x, y) tuples, where x is |
|---|
| 983 | 1-specificity and y is sensitivity. |
|---|
| 984 | """ |
|---|
| 985 | problists, tots = corn.computeROCCumulative(res, class_index) |
|---|
| 986 | |
|---|
| 987 | results = [] |
|---|
| 988 | totPos, totNeg = tots[1], tots[0] |
|---|
| 989 | |
|---|
| 990 | for plist in problists: |
|---|
| 991 | curve=[(1., 1.)] |
|---|
| 992 | TP, TN = totPos, 0.0 |
|---|
| 993 | FN, FP = 0., totNeg |
|---|
| 994 | for prob in plist: |
|---|
| 995 | thisPos, thisNeg = prob[1][1], prob[1][0] |
|---|
| 996 | # thisPos go from TP to FN |
|---|
| 997 | TP -= thisPos |
|---|
| 998 | FN += thisPos |
|---|
| 999 | # thisNeg go from FP to TN |
|---|
| 1000 | TN += thisNeg |
|---|
| 1001 | FP -= thisNeg |
|---|
| 1002 | |
|---|
| 1003 | sens = TP/(TP+FN) |
|---|
| 1004 | spec = TN/(FP+TN) |
|---|
| 1005 | curve.append((1-spec, sens)) |
|---|
| 1006 | results.append(curve) |
|---|
| 1007 | |
|---|
| 1008 | return results |
|---|
| 1009 | |
|---|
| 1010 | ## TC's implementation of algorithms, taken from: |
|---|
| 1011 | ## T Fawcett: ROC Graphs: Notes and Practical Considerations for Data Mining Researchers, submitted to KDD Journal. |
|---|
| 1012 | def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)): |
|---|
| 1013 | if P1x == P2x: |
|---|
| 1014 | return 1e300 |
|---|
| 1015 | return (P1y - P2y) / (P1x - P2x) |
|---|
| 1016 | |
|---|
| 1017 | |
|---|
| 1018 | @deprecated_keywords({"keepConcavities": "keep_concavities"}) |
|---|
| 1019 | def ROC_add_point(P, R, keep_concavities=1): |
|---|
| 1020 | if keep_concavities: |
|---|
| 1021 | R.append(P) |
|---|
| 1022 | else: |
|---|
| 1023 | while True: |
|---|
| 1024 | if len(R) < 2: |
|---|
| 1025 | R.append(P) |
|---|
| 1026 | return R |
|---|
| 1027 | else: |
|---|
| 1028 | T = R.pop() |
|---|
| 1029 | T2 = R[-1] |
|---|
| 1030 | if ROC_slope(T2, T) > ROC_slope(T, P): |
|---|
| 1031 | R.append(T) |
|---|
| 1032 | R.append(P) |
|---|
| 1033 | return R |
|---|
| 1034 | return R |
|---|
| 1035 | |
|---|
| 1036 | |
|---|
| 1037 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1038 | "keepConcavities": "keep_concavities"}) |
|---|
| 1039 | def TC_compute_ROC(res, class_index=-1, keep_concavities=1): |
|---|
| 1040 | problists, tots = corn.computeROCCumulative(res, class_index) |
|---|
| 1041 | |
|---|
| 1042 | results = [] |
|---|
| 1043 | P, N = tots[1], tots[0] |
|---|
| 1044 | |
|---|
| 1045 | for plist in problists: |
|---|
| 1046 | ## corn gives an increasing by scores list, we need a decreasing by scores |
|---|
| 1047 | plist.reverse() |
|---|
| 1048 | TP = 0.0 |
|---|
| 1049 | FP = 0.0 |
|---|
| 1050 | curve=[] |
|---|
| 1051 | fPrev = 10e300 # "infinity" score at 0.0, 0.0 |
|---|
| 1052 | for prob in plist: |
|---|
| 1053 | f = prob[0] |
|---|
| 1054 | if f <> fPrev: |
|---|
| 1055 | if P: |
|---|
| 1056 | tpr = TP/P |
|---|
| 1057 | else: |
|---|
| 1058 | tpr = 0.0 |
|---|
| 1059 | if N: |
|---|
| 1060 | fpr = FP/N |
|---|
| 1061 | else: |
|---|
| 1062 | fpr = 0.0 |
|---|
| 1063 | curve = ROC_add_point((fpr, tpr, fPrev), curve, keep_concavities) |
|---|
| 1064 | fPrev = f |
|---|
| 1065 | thisPos, thisNeg = prob[1][1], prob[1][0] |
|---|
| 1066 | TP += thisPos |
|---|
| 1067 | FP += thisNeg |
|---|
| 1068 | if P: |
|---|
| 1069 | tpr = TP/P |
|---|
| 1070 | else: |
|---|
| 1071 | tpr = 0.0 |
|---|
| 1072 | if N: |
|---|
| 1073 | fpr = FP/N |
|---|
| 1074 | else: |
|---|
| 1075 | fpr = 0.0 |
|---|
| 1076 | curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly |
|---|
| 1077 | results.append(curve) |
|---|
| 1078 | |
|---|
| 1079 | return results |
|---|
| 1080 | |
|---|
| 1081 | ## returns a list of points at the intersection of the tangential iso-performance line and the given ROC curve |
|---|
| 1082 | ## for given values of FPcost, FNcost and pval |
|---|
| 1083 | def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve): |
|---|
| 1084 | m = (FPcost*(1.0 - pval)) / (FNcost*pval) |
|---|
| 1085 | |
|---|
| 1086 | ## put the iso-performance line in point (0.0, 1.0) |
|---|
| 1087 | x0, y0 = (0.0, 1.0) |
|---|
| 1088 | x1, y1 = (1.0, 1.0 + m) |
|---|
| 1089 | d01 = math.sqrt((x1 - x0)*(x1 - x0) + (y1 - y0)*(y1 - y0)) |
|---|
| 1090 | |
|---|
| 1091 | ## calculate and find the closest point to the line |
|---|
| 1092 | firstp = 1 |
|---|
| 1093 | mind = 0.0 |
|---|
| 1094 | a = (x0*y1 - x1*y0) |
|---|
| 1095 | closestPoints = [] |
|---|
| 1096 | for (x, y, fscore) in curve: |
|---|
| 1097 | d = ((y0 - y1)*x + (x1 - x0)*y + a) / d01 |
|---|
| 1098 | d = abs(d) |
|---|
| 1099 | if firstp or d < mind: |
|---|
| 1100 | mind, firstp = d, 0 |
|---|
| 1101 | closestPoints = [(x, y, fscore)] |
|---|
| 1102 | else: |
|---|
| 1103 | if abs(d - mind) <= 0.0001: ## close enough |
|---|
| 1104 | closestPoints.append( (x, y, fscore) ) |
|---|
| 1105 | return closestPoints |
|---|
| 1106 | |
|---|
| 1107 | def frange(start, end=None, inc=None): |
|---|
| 1108 | """A range function, that does accept float increments...""" |
|---|
| 1109 | |
|---|
| 1110 | if end is None: |
|---|
| 1111 | end = start + 0.0 |
|---|
| 1112 | start = 0.0 |
|---|
| 1113 | |
|---|
| 1114 | if inc is None or inc == 0: |
|---|
| 1115 | inc = 1.0 |
|---|
| 1116 | |
|---|
| 1117 | L = [start] |
|---|
| 1118 | while 1: |
|---|
| 1119 | next = start + len(L) * inc |
|---|
| 1120 | if inc > 0 and next >= end: |
|---|
| 1121 | L.append(end) |
|---|
| 1122 | break |
|---|
| 1123 | elif inc < 0 and next <= end: |
|---|
| 1124 | L.append(end) |
|---|
| 1125 | break |
|---|
| 1126 | L.append(next) |
|---|
| 1127 | |
|---|
| 1128 | return L |
|---|
| 1129 | |
|---|
| 1130 | ## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN], |
|---|
| 1131 | ## where ROCcurvesX is a set of ROC curves, |
|---|
| 1132 | ## where a (one) ROC curve is a set of (FP, TP) points |
|---|
| 1133 | ## |
|---|
| 1134 | ## for each (sub)set of input ROC curves |
|---|
| 1135 | ## returns the average ROC curve and an array of (vertical) standard deviations |
|---|
| 1136 | @deprecated_keywords({"ROCcurves": "roc_curves"}) |
|---|
| 1137 | def TC_vertical_average_ROC(roc_curves, samples = 10): |
|---|
| 1138 | def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X): |
|---|
| 1139 | if (P1x == P2x) or P1x < X > P2x or P1x > X < P2x: |
|---|
| 1140 | raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X) |
|---|
| 1141 | dx = float(P2x) - float(P1x) |
|---|
| 1142 | dy = float(P2y) - float(P1y) |
|---|
| 1143 | m = dy/dx |
|---|
| 1144 | return P1y + m*(X - P1x) |
|---|
| 1145 | |
|---|
| 1146 | def TP_FOR_FP(FPsample, ROC, npts): |
|---|
| 1147 | i = 0 |
|---|
| 1148 | while i < npts - 1: |
|---|
| 1149 | (fp, _, _) = ROC[i + 1] |
|---|
| 1150 | if fp <= FPsample: |
|---|
| 1151 | i += 1 |
|---|
| 1152 | else: |
|---|
| 1153 | break |
|---|
| 1154 | (fp, tp, _) = ROC[i] |
|---|
| 1155 | if fp == FPsample: |
|---|
| 1156 | return tp |
|---|
| 1157 | elif fp < FPsample and i + 1 < len(ROC): |
|---|
| 1158 | return INTERPOLATE(ROC[i], ROC[i+1], FPsample) |
|---|
| 1159 | elif fp < FPsample and i + 1 == len(ROC): # return the last |
|---|
| 1160 | return ROC[i][1] |
|---|
| 1161 | raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC" |
|---|
| 1162 | #return 0.0 |
|---|
| 1163 | |
|---|
| 1164 | average = [] |
|---|
| 1165 | stdev = [] |
|---|
| 1166 | for ROCS in roc_curves: |
|---|
| 1167 | npts = [] |
|---|
| 1168 | for c in ROCS: |
|---|
| 1169 | npts.append(len(c)) |
|---|
| 1170 | nrocs = len(ROCS) |
|---|
| 1171 | |
|---|
| 1172 | TPavg = [] |
|---|
| 1173 | TPstd = [] |
|---|
| 1174 | for FPsample in frange(0.0, 1.0, 1.0/samples): |
|---|
| 1175 | TPsum = [] |
|---|
| 1176 | for i in range(nrocs): |
|---|
| 1177 | TPsum.append( TP_FOR_FP(FPsample, ROCS[i], npts[i]) ) ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i]) |
|---|
| 1178 | TPavg.append( (FPsample, statc.mean(TPsum)) ) |
|---|
| 1179 | if len(TPsum) > 1: |
|---|
| 1180 | stdv = statc.std(TPsum) |
|---|
| 1181 | else: |
|---|
| 1182 | stdv = 0.0 |
|---|
| 1183 | TPstd.append( stdv ) |
|---|
| 1184 | |
|---|
| 1185 | average.append(TPavg) |
|---|
| 1186 | stdev.append(TPstd) |
|---|
| 1187 | |
|---|
| 1188 | return average, stdev |
|---|
| 1189 | |
|---|
| 1190 | ## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN], |
|---|
| 1191 | ## where ROCcurvesX is a set of ROC curves, |
|---|
| 1192 | ## where a (one) ROC curve is a set of (FP, TP) points |
|---|
| 1193 | ## |
|---|
| 1194 | ## for each (sub)set of input ROC curves |
|---|
| 1195 | ## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations |
|---|
| 1196 | @deprecated_keywords({"ROCcurves": "roc_curves"}) |
|---|
| 1197 | def TC_threshold_average_ROC(roc_curves, samples = 10): |
|---|
| 1198 | def POINT_AT_THRESH(ROC, npts, thresh): |
|---|
| 1199 | i = 0 |
|---|
| 1200 | while i < npts - 1: |
|---|
| 1201 | (px, py, pfscore) = ROC[i] |
|---|
| 1202 | if pfscore > thresh: |
|---|
| 1203 | i += 1 |
|---|
| 1204 | else: |
|---|
| 1205 | break |
|---|
| 1206 | return ROC[i] |
|---|
| 1207 | |
|---|
| 1208 | average = [] |
|---|
| 1209 | stdevV = [] |
|---|
| 1210 | stdevH = [] |
|---|
| 1211 | for ROCS in roc_curves: |
|---|
| 1212 | npts = [] |
|---|
| 1213 | for c in ROCS: |
|---|
| 1214 | npts.append(len(c)) |
|---|
| 1215 | nrocs = len(ROCS) |
|---|
| 1216 | |
|---|
| 1217 | T = [] |
|---|
| 1218 | for c in ROCS: |
|---|
| 1219 | for (px, py, pfscore) in c: |
|---|
| 1220 | ## try: |
|---|
| 1221 | ## T.index(pfscore) |
|---|
| 1222 | ## except: |
|---|
| 1223 | T.append(pfscore) |
|---|
| 1224 | T.sort() |
|---|
| 1225 | T.reverse() ## ugly |
|---|
| 1226 | |
|---|
| 1227 | TPavg = [] |
|---|
| 1228 | TPstdV = [] |
|---|
| 1229 | TPstdH = [] |
|---|
| 1230 | for tidx in frange(0, (len(T) - 1.0), float(len(T))/samples): |
|---|
| 1231 | FPsum = [] |
|---|
| 1232 | TPsum = [] |
|---|
| 1233 | for i in range(nrocs): |
|---|
| 1234 | (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)]) |
|---|
| 1235 | FPsum.append(fp) |
|---|
| 1236 | TPsum.append(tp) |
|---|
| 1237 | TPavg.append( (statc.mean(FPsum), statc.mean(TPsum)) ) |
|---|
| 1238 | ## vertical standard deviation |
|---|
| 1239 | if len(TPsum) > 1: |
|---|
| 1240 | stdv = statc.std(TPsum) |
|---|
| 1241 | else: |
|---|
| 1242 | stdv = 0.0 |
|---|
| 1243 | TPstdV.append( stdv ) |
|---|
| 1244 | ## horizontal standard deviation |
|---|
| 1245 | if len(FPsum) > 1: |
|---|
| 1246 | stdh = statc.std(FPsum) |
|---|
| 1247 | else: |
|---|
| 1248 | stdh = 0.0 |
|---|
| 1249 | TPstdH.append( stdh ) |
|---|
| 1250 | |
|---|
| 1251 | average.append(TPavg) |
|---|
| 1252 | stdevV.append(TPstdV) |
|---|
| 1253 | stdevH.append(TPstdH) |
|---|
| 1254 | |
|---|
| 1255 | return average, stdevV, stdevH |
|---|
| 1256 | |
|---|
| 1257 | ## Calibration Curve |
|---|
| 1258 | ## returns an array of (curve, yesClassPredictions, noClassPredictions) elements, where: |
|---|
| 1259 | ## - curve is an array of points (x, y) on the calibration curve |
|---|
| 1260 | ## - yesClassRugPoints is an array of (x, 1) points |
|---|
| 1261 | ## - noClassRugPoints is an array of (x, 0) points |
|---|
| 1262 | @deprecated_keywords({"classIndex": "class_index"}) |
|---|
| 1263 | def compute_calibration_curve(res, class_index=-1): |
|---|
| 1264 | ## merge multiple iterations into one |
|---|
| 1265 | mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels) |
|---|
| 1266 | for te in res.results: |
|---|
| 1267 | mres.results.append( te ) |
|---|
| 1268 | |
|---|
| 1269 | problists, tots = corn.computeROCCumulative(mres, class_index) |
|---|
| 1270 | |
|---|
| 1271 | results = [] |
|---|
| 1272 | |
|---|
| 1273 | bins = 10 ## divide interval between 0.0 and 1.0 into N bins |
|---|
| 1274 | |
|---|
| 1275 | for plist in problists: |
|---|
| 1276 | yesClassRugPoints = [] |
|---|
| 1277 | noClassRugPoints = [] |
|---|
| 1278 | |
|---|
| 1279 | yesBinsVals = [0] * bins |
|---|
| 1280 | noBinsVals = [0] * bins |
|---|
| 1281 | for (f, (thisNeg, thisPos)) in plist: |
|---|
| 1282 | yesClassRugPoints.append( (f, thisPos) ) #1.0 |
|---|
| 1283 | noClassRugPoints.append( (f, thisNeg) ) #1.0 |
|---|
| 1284 | |
|---|
| 1285 | index = int(f * bins ) |
|---|
| 1286 | index = min(index, bins - 1) ## just in case for value 1.0 |
|---|
| 1287 | yesBinsVals[index] += thisPos |
|---|
| 1288 | noBinsVals[index] += thisNeg |
|---|
| 1289 | |
|---|
| 1290 | curve = [] |
|---|
| 1291 | for cn in range(bins): |
|---|
| 1292 | f = float(cn * 1.0 / bins) + (1.0 / 2.0 / bins) |
|---|
| 1293 | yesVal = yesBinsVals[cn] |
|---|
| 1294 | noVal = noBinsVals[cn] |
|---|
| 1295 | allVal = yesVal + noVal |
|---|
| 1296 | if allVal == 0.0: continue |
|---|
| 1297 | y = float(yesVal)/float(allVal) |
|---|
| 1298 | curve.append( (f, y) ) |
|---|
| 1299 | |
|---|
| 1300 | ## smooth the curve |
|---|
| 1301 | maxnPoints = 100 |
|---|
| 1302 | if len(curve) >= 3: |
|---|
| 1303 | # loessCurve = statc.loess(curve, -3, 0.6) |
|---|
| 1304 | loessCurve = statc.loess(curve, maxnPoints, 0.5, 3) |
|---|
| 1305 | else: |
|---|
| 1306 | loessCurve = curve |
|---|
| 1307 | clen = len(loessCurve) |
|---|
| 1308 | if clen > maxnPoints: |
|---|
| 1309 | df = clen / maxnPoints |
|---|
| 1310 | if df < 1: df = 1 |
|---|
| 1311 | curve = [loessCurve[i] for i in range(0, clen, df)] |
|---|
| 1312 | else: |
|---|
| 1313 | curve = loessCurve |
|---|
| 1314 | curve = [c[:2] for c in curve] ## remove the third value (variance of epsilon?) that suddenly appeared in the output of the statc.loess function |
|---|
| 1315 | results.append((curve, yesClassRugPoints, noClassRugPoints)) |
|---|
| 1316 | |
|---|
| 1317 | return results |
|---|
| 1318 | |
|---|
| 1319 | |
|---|
| 1320 | ## Lift Curve |
|---|
| 1321 | ## returns an array of curve elements, where: |
|---|
| 1322 | ## - curve is an array of points ((TP+FP)/(P + N), TP/P, (th, FP/N)) on the Lift Curve |
|---|
| 1323 | @deprecated_keywords({"classIndex": "class_index"}) |
|---|
| 1324 | def compute_lift_curve(res, class_index=-1): |
|---|
| 1325 | ## merge multiple iterations into one |
|---|
| 1326 | mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels) |
|---|
| 1327 | for te in res.results: |
|---|
| 1328 | mres.results.append( te ) |
|---|
| 1329 | |
|---|
| 1330 | problists, tots = corn.computeROCCumulative(mres, class_index) |
|---|
| 1331 | |
|---|
| 1332 | results = [] |
|---|
| 1333 | P, N = tots[1], tots[0] |
|---|
| 1334 | for plist in problists: |
|---|
| 1335 | ## corn gives an increasing by scores list, we need a decreasing by scores |
|---|
| 1336 | plist.reverse() |
|---|
| 1337 | TP = 0.0 |
|---|
| 1338 | FP = 0.0 |
|---|
| 1339 | curve = [(0.0, 0.0, (10e300, 0.0))] |
|---|
| 1340 | for (f, (thisNeg, thisPos)) in plist: |
|---|
| 1341 | TP += thisPos |
|---|
| 1342 | FP += thisNeg |
|---|
| 1343 | curve.append( ((TP+FP)/(P + N), TP, (f, FP/(N or 1))) ) |
|---|
| 1344 | results.append(curve) |
|---|
| 1345 | |
|---|
| 1346 | return P, N, results |
|---|
| 1347 | ### |
|---|
| 1348 | |
|---|
| 1349 | class CDT: |
|---|
| 1350 | """ Stores number of concordant (C), discordant (D) and tied (T) pairs (used for AUC) """ |
|---|
| 1351 | def __init__(self, C=0.0, D=0.0, T=0.0): |
|---|
| 1352 | self.C, self.D, self.T = C, D, T |
|---|
| 1353 | |
|---|
| 1354 | def is_CDT_empty(cdt): |
|---|
| 1355 | return cdt.C + cdt.D + cdt.T < 1e-20 |
|---|
| 1356 | |
|---|
| 1357 | |
|---|
| 1358 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1359 | "unweighted": "ignore_weights"}) |
|---|
| 1360 | def compute_CDT(res, class_index=-1, ignore_weights=False, **argkw): |
|---|
| 1361 | """Obsolete, don't use""" |
|---|
| 1362 | if class_index<0: |
|---|
| 1363 | if res.baseClass>=0: |
|---|
| 1364 | class_index = res.baseClass |
|---|
| 1365 | else: |
|---|
| 1366 | class_index = 1 |
|---|
| 1367 | |
|---|
| 1368 | useweights = res.weights and not ignore_weights |
|---|
| 1369 | weightByClasses = argkw.get("weightByClasses", True) |
|---|
| 1370 | |
|---|
| 1371 | if res.number_of_iterations>1: |
|---|
| 1372 | CDTs = [CDT() for _ in range(res.number_of_learners)] |
|---|
| 1373 | iterationExperiments = split_by_iterations(res) |
|---|
| 1374 | for exp in iterationExperiments: |
|---|
| 1375 | expCDTs = corn.computeCDT(exp, class_index, useweights) |
|---|
| 1376 | for i in range(len(CDTs)): |
|---|
| 1377 | CDTs[i].C += expCDTs[i].C |
|---|
| 1378 | CDTs[i].D += expCDTs[i].D |
|---|
| 1379 | CDTs[i].T += expCDTs[i].T |
|---|
| 1380 | for i in range(res.number_of_learners): |
|---|
| 1381 | if is_CDT_empty(CDTs[0]): |
|---|
| 1382 | return corn.computeCDT(res, class_index, useweights) |
|---|
| 1383 | |
|---|
| 1384 | return CDTs |
|---|
| 1385 | else: |
|---|
| 1386 | return corn.computeCDT(res, class_index, useweights) |
|---|
| 1387 | |
|---|
| 1388 | # Backward compatibility |
|---|
| 1389 | def replace_use_weights(fun): |
|---|
| 1390 | if environ.orange_no_deprecated_members: |
|---|
| 1391 | return fun |
|---|
| 1392 | |
|---|
| 1393 | @functools.wraps(fun) |
|---|
| 1394 | def wrapped(*args, **kwargs): |
|---|
| 1395 | use_weights = kwargs.pop("useWeights", None) |
|---|
| 1396 | if use_weights is not None: |
|---|
| 1397 | deprecation_warning("useWeights", "ignore_weights") |
|---|
| 1398 | kwargs["ignore_weights"] = not use_weights |
|---|
| 1399 | return fun(*args, **kwargs) |
|---|
| 1400 | return wrapped |
|---|
| 1401 | |
|---|
| 1402 | class AUC(list): |
|---|
| 1403 | """ |
|---|
| 1404 | Compute the area under ROC curve given a set of experimental results. |
|---|
| 1405 | For multivalued class problems, return the result of :obj:`by_weighted_pairs`. |
|---|
| 1406 | If testing consisted of multiple folds, each fold is scored and |
|---|
| 1407 | average score is returned. If a fold contains only instances with |
|---|
| 1408 | same class value, folds will be merged. |
|---|
| 1409 | |
|---|
| 1410 | :param test_results: test results to score |
|---|
| 1411 | :param ignore_weights: ignore instance weights when calculating score |
|---|
| 1412 | :param method: DEPRECATED, call the appropriate method directly. |
|---|
| 1413 | """ |
|---|
| 1414 | |
|---|
| 1415 | @replace_use_weights |
|---|
| 1416 | def __init__(self, test_results=None, method=0, ignore_weights=False): |
|---|
| 1417 | |
|---|
| 1418 | super(AUC, self).__init__() |
|---|
| 1419 | |
|---|
| 1420 | self.ignore_weights=ignore_weights |
|---|
| 1421 | self.method=method |
|---|
| 1422 | |
|---|
| 1423 | if test_results is not None: |
|---|
| 1424 | self.__call__(test_results) |
|---|
| 1425 | |
|---|
| 1426 | def __call__(self, test_results): |
|---|
| 1427 | if len(test_results.class_values) < 2: |
|---|
| 1428 | raise ValueError("Cannot compute AUC on a single-class problem") |
|---|
| 1429 | elif len(test_results.class_values) == 2: |
|---|
| 1430 | self._compute_for_binary_class(test_results) |
|---|
| 1431 | else: |
|---|
| 1432 | self._compute_for_multi_value_class(test_results, self.method) |
|---|
| 1433 | |
|---|
| 1434 | @classmethod |
|---|
| 1435 | def by_weighted_pairs(cls, res, ignore_weights=False): |
|---|
| 1436 | """ |
|---|
| 1437 | Compute AUC for each pair of classes (ignoring instances of all other |
|---|
| 1438 | classes) and averages the results, weighting them by the number of |
|---|
| 1439 | pairs of instances from these two classes (e.g. by the product of |
|---|
| 1440 | probabilities of the two classes). AUC computed in this way still |
|---|
| 1441 | behaves as concordance index, e.g., gives the probability that two |
|---|
| 1442 | randomly chosen instances from different classes will be correctly |
|---|
| 1443 | recognized (if the classifier knows from which two classes the |
|---|
| 1444 | instances came). |
|---|
| 1445 | """ |
|---|
| 1446 | auc = AUC(ignore_weights=ignore_weights) |
|---|
| 1447 | auc._compute_for_multi_value_class(res, method=cls.ByWeightedPairs) |
|---|
| 1448 | return auc |
|---|
| 1449 | |
|---|
| 1450 | @classmethod |
|---|
| 1451 | def by_pairs(cls, res, ignore_weights=False): |
|---|
| 1452 | """ |
|---|
| 1453 | Similar as above, except that the average over class pairs is not |
|---|
| 1454 | weighted. This AUC is, like the binary, independent of class |
|---|
| 1455 | distributions, but it is not related to concordance index any more. |
|---|
| 1456 | """ |
|---|
| 1457 | auc = AUC(ignore_weights=ignore_weights) |
|---|
| 1458 | auc._compute_for_multi_value_class(res, method=cls.ByPairs) |
|---|
| 1459 | return auc |
|---|
| 1460 | |
|---|
| 1461 | @classmethod |
|---|
| 1462 | def weighted_one_against_all(cls, res, ignore_weights=False): |
|---|
| 1463 | """ |
|---|
| 1464 | For each class, it computes AUC for this class against all others (that |
|---|
| 1465 | is, treating other classes as one class). The AUCs are then averaged by |
|---|
| 1466 | the class probabilities. This is related to concordance index in which |
|---|
| 1467 | we test the classifier's (average) capability for distinguishing the |
|---|
| 1468 | instances from a specified class from those that come from other classes. |
|---|
| 1469 | Unlike the binary AUC, the measure is not independent of class |
|---|
| 1470 | distributions. |
|---|
| 1471 | """ |
|---|
| 1472 | auc = AUC(ignore_weights=ignore_weights) |
|---|
| 1473 | auc._compute_for_multi_value_class(res, |
|---|
| 1474 | method=cls.WeightedOneAgainstAll) |
|---|
| 1475 | return auc |
|---|
| 1476 | |
|---|
| 1477 | @classmethod |
|---|
| 1478 | def one_against_all(cls, res, ignore_weights=False): |
|---|
| 1479 | """As above, except that the average is not weighted.""" |
|---|
| 1480 | auc = AUC(ignore_weights=ignore_weights) |
|---|
| 1481 | auc._compute_for_multi_value_class(res, method=cls.OneAgainstAll) |
|---|
| 1482 | return auc |
|---|
| 1483 | |
|---|
| 1484 | @classmethod |
|---|
| 1485 | def single_class(cls, res, class_index=-1, ignore_weights=False): |
|---|
| 1486 | """ |
|---|
| 1487 | Compute AUC where the class with the given class_index is singled |
|---|
| 1488 | out and all other classes are treated as a single class. |
|---|
| 1489 | """ |
|---|
| 1490 | if class_index<0: |
|---|
| 1491 | if res.base_class>=0: |
|---|
| 1492 | class_index = res.base_class |
|---|
| 1493 | else: |
|---|
| 1494 | class_index = 1 |
|---|
| 1495 | |
|---|
| 1496 | auc = AUC(ignore_weights=ignore_weights) |
|---|
| 1497 | auc._compute_for_single_class(res, class_index) |
|---|
| 1498 | return auc |
|---|
| 1499 | |
|---|
| 1500 | @classmethod |
|---|
| 1501 | def pair(cls, res, class_index1, class_index2, ignore_weights=False): |
|---|
| 1502 | """ |
|---|
| 1503 | Computes AUC between a pair of classes, ignoring instances from all |
|---|
| 1504 | other classes. |
|---|
| 1505 | """ |
|---|
| 1506 | auc = AUC(ignore_weights=ignore_weights) |
|---|
| 1507 | auc._compute_for_pair_of_classes(res, class_index1, class_index2) |
|---|
| 1508 | return auc |
|---|
| 1509 | |
|---|
| 1510 | @classmethod |
|---|
| 1511 | def matrix(cls, res, ignore_weights=False): |
|---|
| 1512 | """ |
|---|
| 1513 | Compute a (lower diagonal) matrix with AUCs for all pairs of classes. |
|---|
| 1514 | If there are empty classes, the corresponding elements in the matrix |
|---|
| 1515 | are -1. |
|---|
| 1516 | """ |
|---|
| 1517 | auc = AUC(ignore_weights=ignore_weights) |
|---|
| 1518 | auc._compute_matrix(res) |
|---|
| 1519 | return auc |
|---|
| 1520 | |
|---|
| 1521 | def _compute_for_binary_class(self, res): |
|---|
| 1522 | """AUC for binary classification problems""" |
|---|
| 1523 | if res.number_of_iterations > 1: |
|---|
| 1524 | return self._compute_for_multiple_folds( |
|---|
| 1525 | self._compute_one_class_against_all, |
|---|
| 1526 | split_by_iterations(res), |
|---|
| 1527 | (-1, res, res.number_of_iterations)) |
|---|
| 1528 | else: |
|---|
| 1529 | auc, _ = self._compute_one_class_against_all(res, -1) |
|---|
| 1530 | self[:] = auc |
|---|
| 1531 | return self |
|---|
| 1532 | |
|---|
| 1533 | def _compute_for_multi_value_class(self, res, method=0): |
|---|
| 1534 | """AUC for multiclass classification problems""" |
|---|
| 1535 | numberOfClasses = len(res.class_values) |
|---|
| 1536 | |
|---|
| 1537 | if res.number_of_iterations > 1: |
|---|
| 1538 | iterations = split_by_iterations(res) |
|---|
| 1539 | all_ite = res |
|---|
| 1540 | else: |
|---|
| 1541 | iterations = [res] |
|---|
| 1542 | all_ite = None |
|---|
| 1543 | |
|---|
| 1544 | # by pairs |
|---|
| 1545 | sum_aucs = [0.] * res.number_of_learners |
|---|
| 1546 | usefulClassPairs = 0. |
|---|
| 1547 | |
|---|
| 1548 | prob = None |
|---|
| 1549 | if method in [self.ByWeightedPairs, self.WeightedOneAgainstAll]: |
|---|
| 1550 | prob = class_probabilities_from_res(res) |
|---|
| 1551 | |
|---|
| 1552 | if method in [self.ByWeightedPairs, self.ByPairs]: |
|---|
| 1553 | for classIndex1 in range(numberOfClasses): |
|---|
| 1554 | for classIndex2 in range(classIndex1): |
|---|
| 1555 | subsum_aucs = self._compute_for_multiple_folds( |
|---|
| 1556 | self._compute_one_class_against_another, |
|---|
| 1557 | iterations, |
|---|
| 1558 | (classIndex1, classIndex2, |
|---|
| 1559 | all_ite, |
|---|
| 1560 | res.number_of_iterations)) |
|---|
| 1561 | if subsum_aucs: |
|---|
| 1562 | if method == self.ByWeightedPairs: |
|---|
| 1563 | p_ij = prob[classIndex1] * prob[classIndex2] |
|---|
| 1564 | subsum_aucs = [x * p_ij for x in subsum_aucs] |
|---|
| 1565 | usefulClassPairs += p_ij |
|---|
| 1566 | else: |
|---|
| 1567 | usefulClassPairs += 1 |
|---|
| 1568 | sum_aucs = map(add, sum_aucs, subsum_aucs) |
|---|
| 1569 | else: |
|---|
| 1570 | for classIndex in range(numberOfClasses): |
|---|
| 1571 | subsum_aucs = self._compute_for_multiple_folds( |
|---|
| 1572 | self._compute_one_class_against_all, |
|---|
| 1573 | iterations, (classIndex, all_ite, |
|---|
| 1574 | res.number_of_iterations)) |
|---|
| 1575 | if subsum_aucs: |
|---|
| 1576 | if method == self.ByWeightedPairs: |
|---|
| 1577 | p_i = prob[classIndex] |
|---|
| 1578 | subsum_aucs = [x * p_i for x in subsum_aucs] |
|---|
| 1579 | usefulClassPairs += p_i |
|---|
| 1580 | else: |
|---|
| 1581 | usefulClassPairs += 1 |
|---|
| 1582 | sum_aucs = map(add, sum_aucs, subsum_aucs) |
|---|
| 1583 | |
|---|
| 1584 | if usefulClassPairs > 0: |
|---|
| 1585 | sum_aucs = [x/usefulClassPairs for x in sum_aucs] |
|---|
| 1586 | |
|---|
| 1587 | self[:] = sum_aucs |
|---|
| 1588 | return self |
|---|
| 1589 | |
|---|
| 1590 | # computes the average AUC over folds using a "AUCcomputer" (AUC_i or AUC_ij) |
|---|
| 1591 | # it returns the sum of what is returned by the computer, unless at a certain |
|---|
| 1592 | # fold the computer has to resort to computing over all folds or even this failed; |
|---|
| 1593 | # in these cases the result is returned immediately |
|---|
| 1594 | @deprecated_keywords({"AUCcomputer": "auc_computer", |
|---|
| 1595 | "computerArgs": "computer_args"}) |
|---|
| 1596 | def _compute_for_multiple_folds(self, auc_computer, iterations, |
|---|
| 1597 | computer_args): |
|---|
| 1598 | """Compute the average AUC over folds using :obj:`auc_computer`.""" |
|---|
| 1599 | subsum_aucs = [0.] * iterations[0].number_of_learners |
|---|
| 1600 | for ite in iterations: |
|---|
| 1601 | aucs, foldsUsed = auc_computer(*(ite, ) + computer_args) |
|---|
| 1602 | if not aucs: |
|---|
| 1603 | return None |
|---|
| 1604 | if not foldsUsed: |
|---|
| 1605 | return aucs |
|---|
| 1606 | subsum_aucs = map(add, subsum_aucs, aucs) |
|---|
| 1607 | self[:] = subsum_aucs |
|---|
| 1608 | return self |
|---|
| 1609 | |
|---|
| 1610 | # Computes AUC; in multivalued class problem, AUC is computed as one against all |
|---|
| 1611 | # Results over folds are averages; if some folds examples from one class only, the folds are merged |
|---|
| 1612 | def _compute_for_single_class(self, res, class_index): |
|---|
| 1613 | if res.number_of_iterations > 1: |
|---|
| 1614 | self._compute_for_multiple_folds( |
|---|
| 1615 | self._compute_one_class_against_all, split_by_iterations(res), |
|---|
| 1616 | (class_index, res, res.number_of_iterations)) |
|---|
| 1617 | else: |
|---|
| 1618 | self._compute_one_class_against_all(res, class_index) |
|---|
| 1619 | |
|---|
| 1620 | # Computes AUC for a pair of classes (as if there were no other classes) |
|---|
| 1621 | # Results over folds are averages; if some folds have examples from one class only, the folds are merged |
|---|
| 1622 | def _compute_for_pair_of_classes(self, res, class_index1, class_index2): |
|---|
| 1623 | if res.number_of_iterations > 1: |
|---|
| 1624 | self._compute_for_multiple_folds(self._compute_one_class_against_another, |
|---|
| 1625 | split_by_iterations(res), |
|---|
| 1626 | (class_index1, class_index2, res, res.number_of_iterations)) |
|---|
| 1627 | else: |
|---|
| 1628 | self._compute_one_class_against_another(res, class_index1, class_index2) |
|---|
| 1629 | |
|---|
| 1630 | # computes AUC between class i and the other classes (treating them as the same class) |
|---|
| 1631 | @deprecated_keywords({"classIndex": "class_index", |
|---|
| 1632 | "divideByIfIte": "divide_by_if_ite"}) |
|---|
| 1633 | def _compute_one_class_against_all(self, ite, class_index, all_ite=None, |
|---|
| 1634 | divide_by_if_ite=1.0): |
|---|
| 1635 | """Compute AUC between class i and all the other classes)""" |
|---|
| 1636 | return self._compute_auc(corn.computeCDT, ite, all_ite, divide_by_if_ite, |
|---|
| 1637 | (class_index, not self.ignore_weights)) |
|---|
| 1638 | |
|---|
| 1639 | |
|---|
| 1640 | # computes AUC between classes i and j as if there are no other classes |
|---|
| 1641 | def _compute_one_class_against_another(self, ite, class_index1, |
|---|
| 1642 | class_index2, all_ite=None, |
|---|
| 1643 | divide_by_if_ite=1.0): |
|---|
| 1644 | """ |
|---|
| 1645 | Compute AUC between classes i and j as if there are no other classes. |
|---|
| 1646 | """ |
|---|
| 1647 | return self._compute_auc(corn.computeCDTPair, ite, all_ite, divide_by_if_ite, |
|---|
| 1648 | (class_index1, class_index2, not self.ignore_weights)) |
|---|
| 1649 | |
|---|
| 1650 | # computes AUC using a specified 'cdtComputer' function |
|---|
| 1651 | # It tries to compute AUCs from 'ite' (examples from a single iteration) and, |
|---|
| 1652 | # if C+D+T=0, from 'all_ite' (entire test set). In the former case, the AUCs |
|---|
| 1653 | # are divided by 'divideByIfIte'. Additional flag is returned which is True in |
|---|
| 1654 | # the former case, or False in the latter. |
|---|
| 1655 | @deprecated_keywords({"cdt_computer": "cdtComputer", |
|---|
| 1656 | "divideByIfIte": "divide_by_if_ite", |
|---|
| 1657 | "computerArgs": "computer_args"}) |
|---|
| 1658 | def _compute_auc(self, cdt_computer, ite, all_ite, divide_by_if_ite, |
|---|
| 1659 | computer_args): |
|---|
| 1660 | """ |
|---|
| 1661 | Compute AUC using a :obj:`cdt_computer`. |
|---|
| 1662 | """ |
|---|
| 1663 | cdts = cdt_computer(*(ite, ) + computer_args) |
|---|
| 1664 | if not is_CDT_empty(cdts[0]): |
|---|
| 1665 | return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T)/divide_by_if_ite for cdt in cdts], True |
|---|
| 1666 | |
|---|
| 1667 | if all_ite: |
|---|
| 1668 | cdts = cdt_computer(*(all_ite, ) + computer_args) |
|---|
| 1669 | if not is_CDT_empty(cdts[0]): |
|---|
| 1670 | return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T) for cdt in cdts], False |
|---|
| 1671 | |
|---|
| 1672 | return False, False |
|---|
| 1673 | |
|---|
| 1674 | def _compute_matrix(self, res): |
|---|
| 1675 | numberOfClasses = len(res.class_values) |
|---|
| 1676 | number_of_learners = res.number_of_learners |
|---|
| 1677 | if res.number_of_iterations > 1: |
|---|
| 1678 | iterations, all_ite = split_by_iterations(res), res |
|---|
| 1679 | else: |
|---|
| 1680 | iterations, all_ite = [res], None |
|---|
| 1681 | aucs = [[[] for _ in range(numberOfClasses)] for _ in |
|---|
| 1682 | range(number_of_learners)] |
|---|
| 1683 | for classIndex1 in range(numberOfClasses): |
|---|
| 1684 | for classIndex2 in range(classIndex1): |
|---|
| 1685 | pair_aucs = self._compute_for_multiple_folds( |
|---|
| 1686 | self._compute_one_class_against_another, |
|---|
| 1687 | iterations, (classIndex1, |
|---|
| 1688 | classIndex2, |
|---|
| 1689 | all_ite, |
|---|
| 1690 | res.number_of_iterations)) |
|---|
| 1691 | if pair_aucs: |
|---|
| 1692 | for lrn in range(number_of_learners): |
|---|
| 1693 | aucs[lrn][classIndex1].append(pair_aucs[lrn]) |
|---|
| 1694 | else: |
|---|
| 1695 | for lrn in range(number_of_learners): |
|---|
| 1696 | aucs[lrn][classIndex1].append(-1) |
|---|
| 1697 | self[:] = aucs |
|---|
| 1698 | return aucs |
|---|
| 1699 | |
|---|
| 1700 | #Backward compatibility |
|---|
| 1701 | AUC.ByWeightedPairs = 0 |
|---|
| 1702 | AUC.ByPairs = 1 |
|---|
| 1703 | AUC.WeightedOneAgainstAll = 2 |
|---|
| 1704 | AUC.OneAgainstAll = 3 |
|---|
| 1705 | |
|---|
| 1706 | @replace_use_weights |
|---|
| 1707 | def AUC_binary(res, ignore_weights=False): |
|---|
| 1708 | auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights) |
|---|
| 1709 | auc._compute_for_binary_class(res) |
|---|
| 1710 | return auc |
|---|
| 1711 | |
|---|
| 1712 | @replace_use_weights |
|---|
| 1713 | def AUC_multi(res, ignore_weights=False, method = 0): |
|---|
| 1714 | auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights, |
|---|
| 1715 | method=method) |
|---|
| 1716 | auc._compute_for_multi_value_class(res) |
|---|
| 1717 | return auc |
|---|
| 1718 | |
|---|
| 1719 | def AUC_iterations(auc_computer, iterations, computer_args): |
|---|
| 1720 | auc = deprecated_function_name(AUC)() |
|---|
| 1721 | auc._compute_for_multiple_folds(auc_computer, iterations, computer_args) |
|---|
| 1722 | return auc |
|---|
| 1723 | |
|---|
| 1724 | def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args): |
|---|
| 1725 | auc = deprecated_function_name(AUC)() |
|---|
| 1726 | result = auc._compute_auc(cdtComputer, ite, all_ite, divide_by_if_ite, |
|---|
| 1727 | computer_args) |
|---|
| 1728 | return result |
|---|
| 1729 | |
|---|
| 1730 | @replace_use_weights |
|---|
| 1731 | def AUC_i(ite, class_index, ignore_weights=False, all_ite=None, |
|---|
| 1732 | divide_by_if_ite=1.0): |
|---|
| 1733 | auc = deprecated_function_name(AUC)() |
|---|
| 1734 | result = auc._compute_one_class_against_another(ite, class_index, |
|---|
| 1735 | all_ite=None, divide_by_if_ite=1.0) |
|---|
| 1736 | return result |
|---|
| 1737 | |
|---|
| 1738 | |
|---|
| 1739 | @replace_use_weights |
|---|
| 1740 | def AUC_ij(ite, class_index1, class_index2, ignore_weights=False, |
|---|
| 1741 | all_ite = None, divide_by_if_ite = 1.0): |
|---|
| 1742 | auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights) |
|---|
| 1743 | result = auc._compute_one_class_against_another( |
|---|
| 1744 | ite, class_index1, class_index2, all_ite = None, divide_by_if_ite = 1.0) |
|---|
| 1745 | return result |
|---|
| 1746 | |
|---|
| 1747 | |
|---|
| 1748 | |
|---|
| 1749 | |
|---|
| 1750 | #AUC_binary = replace_use_weights(deprecated_function_name(AUC()._compute_for_binary_class)) |
|---|
| 1751 | #AUC_multi = replace_use_weights(deprecated_function_name(AUC._compute_for_multi_value_class)) |
|---|
| 1752 | #AUC_iterations = replace_use_weights(deprecated_function_name(AUC._compute_for_multiple_folds)) |
|---|
| 1753 | #AUC_x = replace_use_weights(deprecated_function_name(AUC._compute_auc)) |
|---|
| 1754 | #AUC_i = replace_use_weights(deprecated_function_name(AUC._compute_one_class_against_all)) |
|---|
| 1755 | #AUC_ij = replace_use_weights(deprecated_function_name(AUC._compute_one_class_against_another)) |
|---|
| 1756 | |
|---|
| 1757 | AUC_single = replace_use_weights( |
|---|
| 1758 | deprecated_keywords({"classIndex": "class_index"})( |
|---|
| 1759 | deprecated_function_name(AUC.single_class))) |
|---|
| 1760 | AUC_pair = replace_use_weights( |
|---|
| 1761 | deprecated_keywords({"classIndex1": "class_index1", |
|---|
| 1762 | "classIndex2": "class_index2"})( |
|---|
| 1763 | deprecated_function_name(AUC.pair))) |
|---|
| 1764 | AUC_matrix = replace_use_weights(deprecated_function_name(AUC.matrix)) |
|---|
| 1765 | |
|---|
| 1766 | |
|---|
| 1767 | @deprecated_keywords({"unweighted": "ignore_weights"}) |
|---|
| 1768 | def McNemar(res, ignore_weights=False, **argkw): |
|---|
| 1769 | """ Computes a triangular matrix with McNemar statistics for each pair of |
|---|
| 1770 | classifiers. The statistics is distributed by chi-square distribution with |
|---|
| 1771 | one degree of freedom; critical value for 5% significance is around 3.84. |
|---|
| 1772 | """ |
|---|
| 1773 | nLearners = res.number_of_learners |
|---|
| 1774 | mcm = [] |
|---|
| 1775 | for i in range(nLearners): |
|---|
| 1776 | mcm.append([0.0]*res.number_of_learners) |
|---|
| 1777 | |
|---|
| 1778 | if not res.weights or ignore_weights: |
|---|
| 1779 | for i in res.results: |
|---|
| 1780 | actual = i.actual_class |
|---|
| 1781 | classes = i.classes |
|---|
| 1782 | for l1 in range(nLearners): |
|---|
| 1783 | for l2 in range(l1, nLearners): |
|---|
| 1784 | if classes[l1]==actual: |
|---|
| 1785 | if classes[l2]!=actual: |
|---|
| 1786 | mcm[l1][l2] += 1 |
|---|
| 1787 | elif classes[l2]==actual: |
|---|
| 1788 | mcm[l2][l1] += 1 |
|---|
| 1789 | else: |
|---|
| 1790 | for i in res.results: |
|---|
| 1791 | actual = i.actual_class |
|---|
| 1792 | classes = i.classes |
|---|
| 1793 | for l1 in range(nLearners): |
|---|
| 1794 | for l2 in range(l1, nLearners): |
|---|
| 1795 | if classes[l1]==actual: |
|---|
| 1796 | if classes[l2]!=actual: |
|---|
| 1797 | mcm[l1][l2] += i.weight |
|---|
| 1798 | elif classes[l2]==actual: |
|---|
| 1799 | mcm[l2][l1] += i.weight |
|---|
| 1800 | |
|---|
| 1801 | for l1 in range(nLearners): |
|---|
| 1802 | for l2 in range(l1, nLearners): |
|---|
| 1803 | su=mcm[l1][l2] + mcm[l2][l1] |
|---|
| 1804 | if su: |
|---|
| 1805 | mcm[l2][l1] = (abs(mcm[l1][l2]-mcm[l2][l1])-1)**2 / su |
|---|
| 1806 | else: |
|---|
| 1807 | mcm[l2][l1] = 0 |
|---|
| 1808 | |
|---|
| 1809 | for l1 in range(nLearners): |
|---|
| 1810 | mcm[l1]=mcm[l1][:l1] |
|---|
| 1811 | |
|---|
| 1812 | return mcm |
|---|
| 1813 | |
|---|
| 1814 | |
|---|
| 1815 | def McNemar_of_two(res, lrn1, lrn2, ignore_weights=False): |
|---|
| 1816 | """ McNemar_of_two computes a McNemar statistics for a pair of classifier, |
|---|
| 1817 | specified by indices learner1 and learner2. |
|---|
| 1818 | """ |
|---|
| 1819 | tf = ft = 0.0 |
|---|
| 1820 | if not res.weights or ignore_weights: |
|---|
| 1821 | for i in res.results: |
|---|
| 1822 | actual=i.actual_class |
|---|
| 1823 | if i.classes[lrn1]==actual: |
|---|
| 1824 | if i.classes[lrn2]!=actual: |
|---|
| 1825 | tf += i.weight |
|---|
| 1826 | elif i.classes[lrn2]==actual: |
|---|
| 1827 | ft += i.weight |
|---|
| 1828 | else: |
|---|
| 1829 | for i in res.results: |
|---|
| 1830 | actual=i.actual_class |
|---|
| 1831 | if i.classes[lrn1]==actual: |
|---|
| 1832 | if i.classes[lrn2]!=actual: |
|---|
| 1833 | tf += 1.0 |
|---|
| 1834 | elif i.classes[lrn2]==actual: |
|---|
| 1835 | ft += 1.0 |
|---|
| 1836 | |
|---|
| 1837 | su = tf + ft |
|---|
| 1838 | if su: |
|---|
| 1839 | return (abs(tf-ft)-1)**2 / su |
|---|
| 1840 | else: |
|---|
| 1841 | return 0 |
|---|
| 1842 | |
|---|
| 1843 | |
|---|
| 1844 | def Friedman(res, stat=CA): |
|---|
| 1845 | """ Compares classifiers by Friedman test, treating folds as different examles. |
|---|
| 1846 | Returns F, p and average ranks |
|---|
| 1847 | """ |
|---|
| 1848 | res_split = split_by_iterations(res) |
|---|
| 1849 | res = [stat(r) for r in res_split] |
|---|
| 1850 | |
|---|
| 1851 | N = len(res) |
|---|
| 1852 | k = len(res[0]) |
|---|
| 1853 | sums = [0.0]*k |
|---|
| 1854 | for r in res: |
|---|
| 1855 | ranks = [k-x+1 for x in statc.rankdata(r)] |
|---|
| 1856 | if stat==Brier_score: # reverse ranks for Brier_score (lower better) |
|---|
| 1857 | ranks = [k+1-x for x in ranks] |
|---|
| 1858 | sums = map(add, ranks, sums) |
|---|
| 1859 | |
|---|
| 1860 | T = sum(x*x for x in sums) |
|---|
| 1861 | sums = [x/N for x in sums] |
|---|
| 1862 | |
|---|
| 1863 | F = 12.0 / (N*k*(k+1)) * T - 3 * N * (k+1) |
|---|
| 1864 | |
|---|
| 1865 | return F, statc.chisqprob(F, k-1), sums |
|---|
| 1866 | |
|---|
| 1867 | |
|---|
| 1868 | def Wilcoxon_pairs(res, avgranks, stat=CA): |
|---|
| 1869 | """ Returns a triangular matrix, where element[i][j] stores significance of difference |
|---|
| 1870 | between i-th and j-th classifier, as computed by Wilcoxon test. The element is positive |
|---|
| 1871 | if i-th is better than j-th, negative if it is worse, and 1 if they are equal. |
|---|
| 1872 | Arguments to function are ExperimentResults, average ranks (as returned by Friedman) |
|---|
| 1873 | and, optionally, a statistics; greater values should mean better results.append |
|---|
| 1874 | """ |
|---|
| 1875 | res_split = split_by_iterations(res) |
|---|
| 1876 | res = [stat(r) for r in res_split] |
|---|
| 1877 | |
|---|
| 1878 | k = len(res[0]) |
|---|
| 1879 | bt = [] |
|---|
| 1880 | for m1 in range(k): |
|---|
| 1881 | nl = [] |
|---|
| 1882 | for m2 in range(m1+1, k): |
|---|
| 1883 | t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res]) |
|---|
| 1884 | if avgranks[m1]<avgranks[m2]: |
|---|
| 1885 | nl.append(p) |
|---|
| 1886 | elif avgranks[m2]<avgranks[m1]: |
|---|
| 1887 | nl.append(-p) |
|---|
| 1888 | else: |
|---|
| 1889 | nl.append(1) |
|---|
| 1890 | bt.append(nl) |
|---|
| 1891 | return bt |
|---|
| 1892 | |
|---|
| 1893 | |
|---|
| 1894 | @deprecated_keywords({"allResults": "all_results", |
|---|
| 1895 | "noConfidence": "no_confidence"}) |
|---|
| 1896 | def plot_learning_curve_learners(file, all_results, proportions, learners, no_confidence=0): |
|---|
| 1897 | plot_learning_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], no_confidence) |
|---|
| 1898 | |
|---|
| 1899 | |
|---|
| 1900 | @deprecated_keywords({"allResults": "all_results", |
|---|
| 1901 | "noConfidence": "no_confidence"}) |
|---|
| 1902 | def plot_learning_curve(file, all_results, proportions, legend, no_confidence=0): |
|---|
| 1903 | import types |
|---|
| 1904 | fopened=0 |
|---|
| 1905 | if type(file)==types.StringType: |
|---|
| 1906 | file=open(file, "wt") |
|---|
| 1907 | fopened=1 |
|---|
| 1908 | |
|---|
| 1909 | file.write("set yrange [0:1]\n") |
|---|
| 1910 | file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1])) |
|---|
| 1911 | file.write("set multiplot\n\n") |
|---|
| 1912 | CAs = [CA(x, report_se=True) for x in all_results] |
|---|
| 1913 | |
|---|
| 1914 | file.write("plot \\\n") |
|---|
| 1915 | for i in range(len(legend)-1): |
|---|
| 1916 | if not no_confidence: |
|---|
| 1917 | file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i+1)) |
|---|
| 1918 | file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1)) |
|---|
| 1919 | if not no_confidence: |
|---|
| 1920 | file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend))) |
|---|
| 1921 | file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend))) |
|---|
| 1922 | |
|---|
| 1923 | for i in range(len(legend)): |
|---|
| 1924 | if not no_confidence: |
|---|
| 1925 | for p in range(len(proportions)): |
|---|
| 1926 | file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96*CAs[p][i][1])) |
|---|
| 1927 | file.write("e\n\n") |
|---|
| 1928 | |
|---|
| 1929 | for p in range(len(proportions)): |
|---|
| 1930 | file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0])) |
|---|
| 1931 | file.write("e\n\n") |
|---|
| 1932 | |
|---|
| 1933 | if fopened: |
|---|
| 1934 | file.close() |
|---|
| 1935 | |
|---|
| 1936 | |
|---|
| 1937 | def print_single_ROC_curve_coordinates(file, curve): |
|---|
| 1938 | import types |
|---|
| 1939 | fopened=0 |
|---|
| 1940 | if type(file)==types.StringType: |
|---|
| 1941 | file=open(file, "wt") |
|---|
| 1942 | fopened=1 |
|---|
| 1943 | |
|---|
| 1944 | for coord in curve: |
|---|
| 1945 | file.write("%5.3f\t%5.3f\n" % tuple(coord)) |
|---|
| 1946 | |
|---|
| 1947 | if fopened: |
|---|
| 1948 | file.close() |
|---|
| 1949 | |
|---|
| 1950 | |
|---|
| 1951 | def plot_ROC_learners(file, curves, learners): |
|---|
| 1952 | plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]) |
|---|
| 1953 | |
|---|
| 1954 | def plot_ROC(file, curves, legend): |
|---|
| 1955 | import types |
|---|
| 1956 | fopened=0 |
|---|
| 1957 | if type(file)==types.StringType: |
|---|
| 1958 | file=open(file, "wt") |
|---|
| 1959 | fopened=1 |
|---|
| 1960 | |
|---|
| 1961 | file.write("set yrange [0:1]\n") |
|---|
| 1962 | file.write("set xrange [0:1]\n") |
|---|
| 1963 | file.write("set multiplot\n\n") |
|---|
| 1964 | |
|---|
| 1965 | file.write("plot \\\n") |
|---|
| 1966 | for leg in legend: |
|---|
| 1967 | file.write("'-' title '%s' with lines,\\\n" % leg) |
|---|
| 1968 | file.write("'-' title '' with lines\n") |
|---|
| 1969 | |
|---|
| 1970 | for curve in curves: |
|---|
| 1971 | for coord in curve: |
|---|
| 1972 | file.write("%5.3f\t%5.3f\n" % tuple(coord)) |
|---|
| 1973 | file.write("e\n\n") |
|---|
| 1974 | |
|---|
| 1975 | file.write("1.0\t1.0\n0.0\t0.0e\n\n") |
|---|
| 1976 | |
|---|
| 1977 | if fopened: |
|---|
| 1978 | file.close() |
|---|
| 1979 | |
|---|
| 1980 | |
|---|
| 1981 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 1982 | def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference=-1): |
|---|
| 1983 | plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference) |
|---|
| 1984 | |
|---|
| 1985 | |
|---|
| 1986 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 1987 | def plot_McNemar_curve(file, all_results, proportions, legend, reference=-1): |
|---|
| 1988 | if reference<0: |
|---|
| 1989 | reference=len(legend)-1 |
|---|
| 1990 | |
|---|
| 1991 | import types |
|---|
| 1992 | fopened=0 |
|---|
| 1993 | if type(file)==types.StringType: |
|---|
| 1994 | file=open(file, "wt") |
|---|
| 1995 | fopened=1 |
|---|
| 1996 | |
|---|
| 1997 | #file.write("set yrange [0:1]\n") |
|---|
| 1998 | #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1])) |
|---|
| 1999 | file.write("set multiplot\n\n") |
|---|
| 2000 | file.write("plot \\\n") |
|---|
| 2001 | tmap=range(reference)+range(reference+1, len(legend)) |
|---|
| 2002 | for i in tmap[:-1]: |
|---|
| 2003 | file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1)) |
|---|
| 2004 | file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1])) |
|---|
| 2005 | file.write("\n") |
|---|
| 2006 | |
|---|
| 2007 | for i in tmap: |
|---|
| 2008 | for p in range(len(proportions)): |
|---|
| 2009 | file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference))) |
|---|
| 2010 | file.write("e\n\n") |
|---|
| 2011 | |
|---|
| 2012 | if fopened: |
|---|
| 2013 | file.close() |
|---|
| 2014 | |
|---|
| 2015 | default_point_types=("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}")+tuple([chr(x) for x in range(97, 122)]) |
|---|
| 2016 | default_line_types=("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots") |
|---|
| 2017 | |
|---|
| 2018 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 2019 | def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options): |
|---|
| 2020 | return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options) |
|---|
| 2021 | |
|---|
| 2022 | |
|---|
| 2023 | @deprecated_keywords({"allResults": "all_results"}) |
|---|
| 2024 | def learning_curve_to_PiCTeX(file, all_results, proportions, **options): |
|---|
| 2025 | import types |
|---|
| 2026 | fopened=0 |
|---|
| 2027 | if type(file)==types.StringType: |
|---|
| 2028 | file=open(file, "wt") |
|---|
| 2029 | fopened=1 |
|---|
| 2030 | |
|---|
| 2031 | nexamples=len(all_results[0].results) |
|---|
| 2032 | CAs = [CA(x, report_se=True) for x in all_results] |
|---|
| 2033 | |
|---|
| 2034 | graphsize=float(options.get("graphsize", 10.0)) #cm |
|---|
| 2035 | difprop=proportions[-1]-proportions[0] |
|---|
| 2036 | ntestexamples=nexamples*proportions[-1] |
|---|
| 2037 | xunit=graphsize/ntestexamples |
|---|
| 2038 | |
|---|
| 2039 | yshift=float(options.get("yshift", -ntestexamples/20.)) |
|---|
| 2040 | |
|---|
| 2041 | pointtypes=options.get("pointtypes", default_point_types) |
|---|
| 2042 | linetypes=options.get("linetypes", default_line_types) |
|---|
| 2043 | |
|---|
| 2044 | if options.has_key("numberedx"): |
|---|
| 2045 | numberedx=options["numberedx"] |
|---|
| 2046 | if type(numberedx)==types.IntType: |
|---|
| 2047 | if numberedx>0: |
|---|
| 2048 | numberedx=[nexamples*proportions[int(i/float(numberedx)*len(proportions))] for i in range(numberedx)]+[proportions[-1]*nexamples] |
|---|
| 2049 | elif numberedx<0: |
|---|
| 2050 | numberedx = -numberedx |
|---|
| 2051 | newn=[] |
|---|
| 2052 | for i in range(numberedx+1): |
|---|
| 2053 | wanted=proportions[0]+float(i)/numberedx*difprop |
|---|
| 2054 | best=(10, 0) |
|---|
| 2055 | for t in proportions: |
|---|
| 2056 | td=abs(wanted-t) |
|---|
| 2057 | if td<best[0]: |
|---|
| 2058 | best=(td, t) |
|---|
| 2059 | if not best[1] in newn: |
|---|
| 2060 | newn.append(best[1]) |
|---|
| 2061 | newn.sort() |
|---|
| 2062 | numberedx=[nexamples*x for x in newn] |
|---|
| 2063 | elif type(numberedx[0])==types.FloatType: |
|---|
| 2064 | numberedx=[nexamples*x for x in numberedx] |
|---|
| 2065 | else: |
|---|
| 2066 | numberedx=[nexamples*x for x in proportions] |
|---|
| 2067 | |
|---|
| 2068 | file.write("\\mbox{\n") |
|---|
| 2069 | file.write(" \\beginpicture\n") |
|---|
| 2070 | file.write(" \\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize)) |
|---|
| 2071 | file.write(" \\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples)) |
|---|
| 2072 | file.write(" \\axis bottom invisible\n")# label {#examples}\n") |
|---|
| 2073 | file.write(" ticks short at %s /\n" % reduce(lambda x,y:x+" "+y, ["%i"%(x*nexamples+0.5) for x in proportions])) |
|---|
| 2074 | if numberedx: |
|---|
| 2075 | file.write(" long numbered at %s /\n" % reduce(lambda x,y:x+y, ["%i " % int(x+0.5) for x in numberedx])) |
|---|
| 2076 | file.write(" /\n") |
|---|
| 2077 | file.write(" \\axis left invisible\n")# label {classification accuracy}\n") |
|---|
| 2078 | file.write(" shiftedto y=%5.3f\n" % yshift) |
|---|
| 2079 | file.write(" ticks short from 0.0 to 1.0 by 0.05\n") |
|---|
| 2080 | file.write(" long numbered from 0.0 to 1.0 by 0.25\n") |
|---|
| 2081 | file.write(" /\n") |
|---|
| 2082 | if options.has_key("default"): |
|---|
| 2083 | file.write(" \\setdashpattern<1pt, 1pt>\n") |
|---|
| 2084 | file.write(" \\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"])) |
|---|
| 2085 | |
|---|
| 2086 | for i in range(len(CAs[0])): |
|---|
| 2087 | coordinates=reduce(lambda x,y:x+" "+y, ["%i %5.3f" % (proportions[p]*nexamples, CAs[p][i][0]) for p in range(len(proportions))]) |
|---|
| 2088 | if linetypes: |
|---|
| 2089 | file.write(" %s\n" % linetypes[i]) |
|---|
| 2090 | file.write(" \\plot %s /\n" % coordinates) |
|---|
| 2091 | if pointtypes: |
|---|
| 2092 | file.write(" \\multiput %s at %s /\n" % (pointtypes[i], coordinates)) |
|---|
| 2093 | |
|---|
| 2094 | file.write(" \\endpicture\n") |
|---|
| 2095 | file.write("}\n") |
|---|
| 2096 | if fopened: |
|---|
| 2097 | file.close() |
|---|
| 2098 | file.close() |
|---|
| 2099 | del file |
|---|
| 2100 | |
|---|
| 2101 | def legend_learners_to_PiCTeX(file, learners, **options): |
|---|
| 2102 | return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options) |
|---|
| 2103 | |
|---|
| 2104 | def legend_to_PiCTeX(file, legend, **options): |
|---|
| 2105 | import types |
|---|
| 2106 | fopened=0 |
|---|
| 2107 | if type(file)==types.StringType: |
|---|
| 2108 | file=open(file, "wt") |
|---|
| 2109 | fopened=1 |
|---|
| 2110 | |
|---|
| 2111 | pointtypes=options.get("pointtypes", default_point_types) |
|---|
| 2112 | linetypes=options.get("linetypes", default_line_types) |
|---|
| 2113 | |
|---|
| 2114 | file.write("\\mbox{\n") |
|---|
| 2115 | file.write(" \\beginpicture\n") |
|---|
| 2116 | file.write(" \\setcoordinatesystem units <5cm, 1pt>\n\n") |
|---|
| 2117 | file.write(" \\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend)) |
|---|
| 2118 | |
|---|
| 2119 | for i in range(len(legend)): |
|---|
| 2120 | if linetypes: |
|---|
| 2121 | file.write(" %s\n" % linetypes[i]) |
|---|
| 2122 | file.write(" \\plot %5.3f 6 %5.3f 6 /\n" % (i, i+0.2)) |
|---|
| 2123 | if pointtypes: |
|---|
| 2124 | file.write(" \\put {%s} at %5.3f 6\n" % (pointtypes[i], i+0.1)) |
|---|
| 2125 | file.write(" \\put {%s} [lb] at %5.3f 0\n" % (legend[i], i+0.25)) |
|---|
| 2126 | |
|---|
| 2127 | file.write(" \\endpicture\n") |
|---|
| 2128 | file.write("}\n") |
|---|
| 2129 | if fopened: |
|---|
| 2130 | file.close() |
|---|
| 2131 | file.close() |
|---|
| 2132 | del file |
|---|
| 2133 | |
|---|
| 2134 | |
|---|
| 2135 | def compute_friedman(avranks, N): |
|---|
| 2136 | """ Returns a tuple composed of (friedman statistic, degrees of freedom) |
|---|
| 2137 | and (Iman statistic - F-distribution, degrees of freedoma) given average |
|---|
| 2138 | ranks and a number of tested data sets N. |
|---|
| 2139 | """ |
|---|
| 2140 | |
|---|
| 2141 | k = len(avranks) |
|---|
| 2142 | |
|---|
| 2143 | def friedman(N, k, ranks): |
|---|
| 2144 | return 12*N*(sum([rank**2.0 for rank in ranks]) - (k*(k+1)*(k+1)/4.0) )/(k*(k+1)) |
|---|
| 2145 | |
|---|
| 2146 | def iman(fried, N, k): |
|---|
| 2147 | return (N-1)*fried/(N*(k-1) - fried) |
|---|
| 2148 | |
|---|
| 2149 | f = friedman(N, k, avranks) |
|---|
| 2150 | im = iman(f, N, k) |
|---|
| 2151 | fdistdof = (k-1, (k-1)*(N-1)) |
|---|
| 2152 | |
|---|
| 2153 | return (f, k-1), (im, fdistdof) |
|---|
| 2154 | |
|---|
| 2155 | def compute_CD(avranks, N, alpha="0.05", type="nemenyi"): |
|---|
| 2156 | """ Returns critical difference for Nemenyi or Bonferroni-Dunn test |
|---|
| 2157 | according to given alpha (either alpha="0.05" or alpha="0.1") for average |
|---|
| 2158 | ranks and number of tested data sets N. Type can be either "nemenyi" for |
|---|
| 2159 | for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test. |
|---|
| 2160 | """ |
|---|
| 2161 | |
|---|
| 2162 | k = len(avranks) |
|---|
| 2163 | |
|---|
| 2164 | d = {("nemenyi", "0.05"): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774, |
|---|
| 2165 | 2.849705, 2.94832, 3.030879, 3.101730, 3.163684, |
|---|
| 2166 | 3.218654, 3.268004, 3.312739, 3.353618, 3.39123, |
|---|
| 2167 | 3.426041, 3.458425, 3.488685, 3.517073, 3.543799] |
|---|
| 2168 | , ("nemenyi", "0.1"): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516, |
|---|
| 2169 | 2.588521, 2.692732, 2.779884, 2.854606, 2.919889, |
|---|
| 2170 | 2.977768, 3.029694, 3.076733, 3.119693, 3.159199, |
|---|
| 2171 | 3.195743, 3.229723, 3.261461, 3.291224, 3.319233] |
|---|
| 2172 | , ("bonferroni-dunn", "0.05"): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576, |
|---|
| 2173 | 2.638, 2.690, 2.724, 2.773], |
|---|
| 2174 | ("bonferroni-dunn", "0.1"): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326, |
|---|
| 2175 | 2.394, 2.450, 2.498, 2.539]} |
|---|
| 2176 | |
|---|
| 2177 | #can be computed in R as qtukey(0.95, n, Inf)**0.5 |
|---|
| 2178 | #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5) |
|---|
| 2179 | |
|---|
| 2180 | q = d[(type, alpha)] |
|---|
| 2181 | |
|---|
| 2182 | cd = q[k]*(k*(k+1)/(6.0*N))**0.5 |
|---|
| 2183 | |
|---|
| 2184 | return cd |
|---|
| 2185 | |
|---|
| 2186 | |
|---|
| 2187 | def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs): |
|---|
| 2188 | """ |
|---|
| 2189 | Draws a CD graph, which is used to display the differences in methods' |
|---|
| 2190 | performance. |
|---|
| 2191 | See Janez Demsar, Statistical Comparisons of Classifiers over |
|---|
| 2192 | Multiple Data Sets, 7(Jan):1--30, 2006. |
|---|
| 2193 | |
|---|
| 2194 | Needs matplotlib to work. |
|---|
| 2195 | |
|---|
| 2196 | :param filename: Output file name (with extension). Formats supported |
|---|
| 2197 | by matplotlib can be used. |
|---|
| 2198 | :param avranks: List of average methods' ranks. |
|---|
| 2199 | :param names: List of methods' names. |
|---|
| 2200 | |
|---|
| 2201 | :param cd: Critical difference. Used for marking methods that whose |
|---|
| 2202 | difference is not statistically significant. |
|---|
| 2203 | :param lowv: The lowest shown rank, if None, use 1. |
|---|
| 2204 | :param highv: The highest shown rank, if None, use len(avranks). |
|---|
| 2205 | :param width: Width of the drawn figure in inches, default 6 in. |
|---|
| 2206 | :param textspace: Space on figure sides left for the description |
|---|
| 2207 | of methods, default 1 in. |
|---|
| 2208 | :param reverse: If True, the lowest rank is on the right. Default\: False. |
|---|
| 2209 | :param cdmethod: None by default. It can be an index of element in avranks |
|---|
| 2210 | or or names which specifies the method which should be |
|---|
| 2211 | marked with an interval. |
|---|
| 2212 | """ |
|---|
| 2213 | if not HAS_MATPLOTLIB: |
|---|
| 2214 | import sys |
|---|
| 2215 | print >> sys.stderr, "Function requires matplotlib. Please install it." |
|---|
| 2216 | return |
|---|
| 2217 | |
|---|
| 2218 | width = float(width) |
|---|
| 2219 | textspace = float(textspace) |
|---|
| 2220 | |
|---|
| 2221 | def nth(l,n): |
|---|
| 2222 | """ |
|---|
| 2223 | Returns only nth elemnt in a list. |
|---|
| 2224 | """ |
|---|
| 2225 | n = lloc(l,n) |
|---|
| 2226 | return [ a[n] for a in l ] |
|---|
| 2227 | |
|---|
| 2228 | def lloc(l,n): |
|---|
| 2229 | """ |
|---|
| 2230 | List location in list of list structure. |
|---|
| 2231 | Enable the use of negative locations: |
|---|
| 2232 | -1 is the last element, -2 second last... |
|---|
| 2233 | """ |
|---|
| 2234 | if n < 0: |
|---|
| 2235 | return len(l[0])+n |
|---|
| 2236 | else: |
|---|
| 2237 | return n |
|---|
| 2238 | |
|---|
| 2239 | def mxrange(lr): |
|---|
| 2240 | """ |
|---|
| 2241 | Multiple xranges. Can be used to traverse matrices. |
|---|
| 2242 | This function is very slow due to unknown number of |
|---|
| 2243 | parameters. |
|---|
| 2244 | |
|---|
| 2245 | >>> mxrange([3,5]) |
|---|
| 2246 | [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)] |
|---|
| 2247 | |
|---|
| 2248 | >>> mxrange([[3,5,1],[9,0,-3]]) |
|---|
| 2249 | [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)] |
|---|
| 2250 | |
|---|
| 2251 | """ |
|---|
| 2252 | if not len(lr): |
|---|
| 2253 | yield () |
|---|
| 2254 | else: |
|---|
| 2255 | #it can work with single numbers |
|---|
| 2256 | index = lr[0] |
|---|
| 2257 | if type(1) == type(index): |
|---|
| 2258 | index = [ index ] |
|---|
| 2259 | for a in range(*index): |
|---|
| 2260 | for b in mxrange(lr[1:]): |
|---|
| 2261 | yield tuple([a] + list(b)) |
|---|
| 2262 | |
|---|
| 2263 | |
|---|
| 2264 | from matplotlib.figure import Figure |
|---|
| 2265 | from matplotlib.backends.backend_agg import FigureCanvasAgg |
|---|
| 2266 | |
|---|
| 2267 | def print_figure(fig, *args, **kwargs): |
|---|
| 2268 | canvas = FigureCanvasAgg(fig) |
|---|
| 2269 | canvas.print_figure(*args, **kwargs) |
|---|
| 2270 | |
|---|
| 2271 | sums = avranks |
|---|
| 2272 | |
|---|
| 2273 | tempsort = sorted([ (a,i) for i,a in enumerate(sums) ], reverse=reverse) |
|---|
| 2274 | ssums = nth(tempsort, 0) |
|---|
| 2275 | sortidx = nth(tempsort, 1) |
|---|
| 2276 | nnames = [ names[x] for x in sortidx ] |
|---|
| 2277 | |
|---|
| 2278 | if lowv is None: |
|---|
| 2279 | lowv = min(1, int(math.floor(min(ssums)))) |
|---|
| 2280 | if highv is None: |
|---|
| 2281 | highv = max(len(avranks), int(math.ceil(max(ssums)))) |
|---|
| 2282 | |
|---|
| 2283 | cline = 0.4 |
|---|
| 2284 | |
|---|
| 2285 | k = len(sums) |
|---|
| 2286 | |
|---|
| 2287 | lines = None |
|---|
| 2288 | |
|---|
| 2289 | linesblank = 0 |
|---|
| 2290 | scalewidth = width - 2*textspace |
|---|
| 2291 | |
|---|
| 2292 | def rankpos(rank): |
|---|
| 2293 | if not reverse: |
|---|
| 2294 | a = rank - lowv |
|---|
| 2295 | else: |
|---|
| 2296 | a = highv - rank |
|---|
| 2297 | return textspace+scalewidth/(highv-lowv)*a |
|---|
| 2298 | |
|---|
| 2299 | distanceh = 0.25 |
|---|
| 2300 | |
|---|
| 2301 | if cd and cdmethod is None: |
|---|
| 2302 | |
|---|
| 2303 | #get pairs of non significant methods |
|---|
| 2304 | |
|---|
| 2305 | def get_lines(sums, hsd): |
|---|
| 2306 | |
|---|
| 2307 | #get all pairs |
|---|
| 2308 | lsums = len(sums) |
|---|
| 2309 | allpairs = [ (i,j) for i,j in mxrange([[lsums], [lsums]]) if j > i ] |
|---|
| 2310 | #remove not significant |
|---|
| 2311 | notSig = [ (i,j) for i,j in allpairs if abs(sums[i]-sums[j]) <= hsd ] |
|---|
| 2312 | #keep only longest |
|---|
| 2313 | |
|---|
| 2314 | def no_longer((i,j), notSig): |
|---|
| 2315 | for i1,j1 in notSig: |
|---|
| 2316 | if (i1 <= i and j1 > j) or (i1 < i and j1 >= j): |
|---|
| 2317 | return False |
|---|
| 2318 | return True |
|---|
| 2319 | |
|---|
| 2320 | longest = [ (i,j) for i,j in notSig if no_longer((i,j),notSig) ] |
|---|
| 2321 | |
|---|
| 2322 | return longest |
|---|
| 2323 | |
|---|
| 2324 | lines = get_lines(ssums, cd) |
|---|
| 2325 | linesblank = 0.2 + 0.2 + (len(lines)-1)*0.1 |
|---|
| 2326 | |
|---|
| 2327 | #add scale |
|---|
| 2328 | distanceh = 0.25 |
|---|
| 2329 | cline += distanceh |
|---|
| 2330 | |
|---|
| 2331 | #calculate height needed height of an image |
|---|
| 2332 | minnotsignificant = max(2*0.2, linesblank) |
|---|
| 2333 | height = cline + ((k+1)/2)*0.2 + minnotsignificant |
|---|
| 2334 | |
|---|
| 2335 | fig = Figure(figsize=(width, height)) |
|---|
| 2336 | ax = fig.add_axes([0,0,1,1]) #reverse y axis |
|---|
| 2337 | ax.set_axis_off() |
|---|
| 2338 | |
|---|
| 2339 | hf = 1./height # height factor |
|---|
| 2340 | wf = 1./width |
|---|
| 2341 | |
|---|
| 2342 | def hfl(l): |
|---|
| 2343 | return [ a*hf for a in l ] |
|---|
| 2344 | |
|---|
| 2345 | def wfl(l): |
|---|
| 2346 | return [ a*wf for a in l ] |
|---|
| 2347 | |
|---|
| 2348 | |
|---|
| 2349 | # Upper left corner is (0,0). |
|---|
| 2350 | |
|---|
| 2351 | ax.plot([0,1], [0,1], c="w") |
|---|
| 2352 | ax.set_xlim(0, 1) |
|---|
| 2353 | ax.set_ylim(1, 0) |
|---|
| 2354 | |
|---|
| 2355 | def line(l, color='k', **kwargs): |
|---|
| 2356 | """ |
|---|
| 2357 | Input is a list of pairs of points. |
|---|
| 2358 | """ |
|---|
| 2359 | ax.plot(wfl(nth(l,0)), hfl(nth(l,1)), color=color, **kwargs) |
|---|
| 2360 | |
|---|
| 2361 | def text(x, y, s, *args, **kwargs): |
|---|
| 2362 | ax.text(wf*x, hf*y, s, *args, **kwargs) |
|---|
| 2363 | |
|---|
| 2364 | line([(textspace, cline), (width-textspace, cline)], linewidth=0.7) |
|---|
| 2365 | |
|---|
| 2366 | bigtick = 0.1 |
|---|
| 2367 | smalltick = 0.05 |
|---|
| 2368 | |
|---|
| 2369 | |
|---|
| 2370 | import numpy |
|---|
| 2371 | tick = None |
|---|
| 2372 | for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]: |
|---|
| 2373 | tick = smalltick |
|---|
| 2374 | if a == int(a): tick = bigtick |
|---|
| 2375 | line([(rankpos(a), cline-tick/2),(rankpos(a), cline)], linewidth=0.7) |
|---|
| 2376 | |
|---|
| 2377 | for a in range(lowv, highv+1): |
|---|
| 2378 | text(rankpos(a), cline-tick/2-0.05, str(a), ha="center", va="bottom") |
|---|
| 2379 | |
|---|
| 2380 | k = len(ssums) |
|---|
| 2381 | |
|---|
| 2382 | for i in range((k+1)/2): |
|---|
| 2383 | chei = cline+ minnotsignificant + i *0.2 |
|---|
| 2384 | line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace-0.1, chei)], linewidth=0.7) |
|---|
| 2385 | text(textspace-0.2, chei, nnames[i], ha="right", va="center") |
|---|
| 2386 | |
|---|
| 2387 | for i in range((k+1)/2, k): |
|---|
| 2388 | chei = cline + minnotsignificant + (k-i-1)*0.2 |
|---|
| 2389 | line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace+scalewidth+0.1, chei)], linewidth=0.7) |
|---|
| 2390 | text(textspace+scalewidth+0.2, chei, nnames[i], ha="left", va="center") |
|---|
| 2391 | |
|---|
| 2392 | if cd and cdmethod is None: |
|---|
| 2393 | |
|---|
| 2394 | #upper scale |
|---|
| 2395 | if not reverse: |
|---|
| 2396 | begin, end = rankpos(lowv), rankpos(lowv+cd) |
|---|
| 2397 | else: |
|---|
| 2398 | begin, end = rankpos(highv), rankpos(highv - cd) |
|---|
| 2399 | |
|---|
| 2400 | line([(begin, distanceh), (end, distanceh)], linewidth=0.7) |
|---|
| 2401 | line([(begin, distanceh + bigtick/2), (begin, distanceh - bigtick/2)], linewidth=0.7) |
|---|
| 2402 | line([(end, distanceh + bigtick/2), (end, distanceh - bigtick/2)], linewidth=0.7) |
|---|
| 2403 | text((begin+end)/2, distanceh - 0.05, "CD", ha="center", va="bottom") |
|---|
| 2404 | |
|---|
| 2405 | #non significance lines |
|---|
| 2406 | def draw_lines(lines, side=0.05, height=0.1): |
|---|
| 2407 | start = cline + 0.2 |
|---|
| 2408 | for l,r in lines: |
|---|
| 2409 | line([(rankpos(ssums[l])-side, start), (rankpos(ssums[r])+side, start)], linewidth=2.5) |
|---|
| 2410 | start += height |
|---|
| 2411 | |
|---|
| 2412 | draw_lines(lines) |
|---|
| 2413 | |
|---|
| 2414 | elif cd: |
|---|
| 2415 | begin = rankpos(avranks[cdmethod]-cd) |
|---|
| 2416 | end = rankpos(avranks[cdmethod]+cd) |
|---|
| 2417 | line([(begin, cline), (end, cline)], linewidth=2.5) |
|---|
| 2418 | line([(begin, cline + bigtick/2), (begin, cline - bigtick/2)], linewidth=2.5) |
|---|
| 2419 | line([(end, cline + bigtick/2), (end, cline - bigtick/2)], linewidth=2.5) |
|---|
| 2420 | |
|---|
| 2421 | print_figure(fig, filename, **kwargs) |
|---|
| 2422 | |
|---|
| 2423 | def mlc_hamming_loss(res): |
|---|
| 2424 | """ |
|---|
| 2425 | Schapire and Singer (2000) presented Hamming Loss, which id defined as: |
|---|
| 2426 | |
|---|
| 2427 | :math:`HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}` |
|---|
| 2428 | """ |
|---|
| 2429 | losses = [0]*res.number_of_learners |
|---|
| 2430 | label_num = len(res.labels) |
|---|
| 2431 | example_num = gettotsize(res) |
|---|
| 2432 | |
|---|
| 2433 | for e in res.results: |
|---|
| 2434 | aclass = e.actual_class |
|---|
| 2435 | for i, labels in enumerate(e.classes): |
|---|
| 2436 | labels = map(int, labels) |
|---|
| 2437 | if len(labels) <> len(aclass): |
|---|
| 2438 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2439 | for j in range(label_num): |
|---|
| 2440 | if labels[j] != aclass[j]: |
|---|
| 2441 | losses[i] += 1 |
|---|
| 2442 | |
|---|
| 2443 | return [float(x)/(label_num*example_num) for x in losses] |
|---|
| 2444 | |
|---|
| 2445 | def mlc_accuracy(res, forgiveness_rate = 1.0): |
|---|
| 2446 | """ |
|---|
| 2447 | Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows: |
|---|
| 2448 | |
|---|
| 2449 | :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}` |
|---|
| 2450 | |
|---|
| 2451 | Boutell et al. (2004) give a more generalized version using a parameter :math:`\\alpha \\ge 0`, |
|---|
| 2452 | called forgiveness rate: |
|---|
| 2453 | |
|---|
| 2454 | :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}` |
|---|
| 2455 | """ |
|---|
| 2456 | accuracies = [0.0]*res.number_of_learners |
|---|
| 2457 | example_num = gettotsize(res) |
|---|
| 2458 | |
|---|
| 2459 | for e in res.results: |
|---|
| 2460 | aclass = e.actual_class |
|---|
| 2461 | for i, labels in enumerate(e.classes): |
|---|
| 2462 | labels = map(int, labels) |
|---|
| 2463 | if len(labels) <> len(aclass): |
|---|
| 2464 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2465 | |
|---|
| 2466 | intersection = 0.0 |
|---|
| 2467 | union = 0.0 |
|---|
| 2468 | for real, pred in zip(labels, aclass): |
|---|
| 2469 | if real and pred: |
|---|
| 2470 | intersection += 1 |
|---|
| 2471 | if real or pred: |
|---|
| 2472 | union += 1 |
|---|
| 2473 | |
|---|
| 2474 | if union: |
|---|
| 2475 | accuracies[i] += intersection / union |
|---|
| 2476 | |
|---|
| 2477 | return [math.pow(x/example_num,forgiveness_rate) for x in accuracies] |
|---|
| 2478 | |
|---|
| 2479 | def mlc_precision(res): |
|---|
| 2480 | """ |
|---|
| 2481 | :math:`Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}` |
|---|
| 2482 | """ |
|---|
| 2483 | precisions = [0.0]*res.number_of_learners |
|---|
| 2484 | example_num = gettotsize(res) |
|---|
| 2485 | |
|---|
| 2486 | for e in res.results: |
|---|
| 2487 | aclass = e.actual_class |
|---|
| 2488 | for i, labels in enumerate(e.classes): |
|---|
| 2489 | labels = map(int, labels) |
|---|
| 2490 | if len(labels) <> len(aclass): |
|---|
| 2491 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2492 | |
|---|
| 2493 | intersection = 0.0 |
|---|
| 2494 | predicted = 0.0 |
|---|
| 2495 | for real, pred in zip(labels, aclass): |
|---|
| 2496 | if real and pred: |
|---|
| 2497 | intersection += 1 |
|---|
| 2498 | if real: |
|---|
| 2499 | predicted += 1 |
|---|
| 2500 | if predicted: |
|---|
| 2501 | precisions[i] += intersection / predicted |
|---|
| 2502 | |
|---|
| 2503 | return [x/example_num for x in precisions] |
|---|
| 2504 | |
|---|
| 2505 | def mlc_recall(res): |
|---|
| 2506 | """ |
|---|
| 2507 | :math:`Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}` |
|---|
| 2508 | """ |
|---|
| 2509 | recalls = [0.0]*res.number_of_learners |
|---|
| 2510 | example_num = gettotsize(res) |
|---|
| 2511 | |
|---|
| 2512 | for e in res.results: |
|---|
| 2513 | aclass = e.actual_class |
|---|
| 2514 | for i, labels in enumerate(e.classes): |
|---|
| 2515 | labels = map(int, labels) |
|---|
| 2516 | if len(labels) <> len(aclass): |
|---|
| 2517 | raise ValueError, "The dimensions of the classified output and the actual class array do not match." |
|---|
| 2518 | |
|---|
| 2519 | intersection = 0.0 |
|---|
| 2520 | actual = 0.0 |
|---|
| 2521 | for real, pred in zip(labels, aclass): |
|---|
| 2522 | if real and pred: |
|---|
| 2523 | intersection += 1 |
|---|
| 2524 | if pred: |
|---|
| 2525 | actual += 1 |
|---|
| 2526 | if actual: |
|---|
| 2527 | recalls[i] += intersection / actual |
|---|
| 2528 | |
|---|
| 2529 | return [x/example_num for x in recalls] |
|---|
| 2530 | |
|---|
| 2531 | #def mlc_ranking_loss(res): |
|---|
| 2532 | # pass |
|---|
| 2533 | # |
|---|
| 2534 | #def mlc_average_precision(res): |
|---|
| 2535 | # pass |
|---|
| 2536 | # |
|---|
| 2537 | #def mlc_hierarchical_loss(res): |
|---|
| 2538 | # pass |
|---|
| 2539 | |
|---|
| 2540 | |
|---|
| 2541 | def mt_average_score(res, score, weights=None): |
|---|
| 2542 | """ |
|---|
| 2543 | Compute individual scores for each target and return the (weighted) average. |
|---|
| 2544 | |
|---|
| 2545 | One method can be used to compute scores for all targets or a list of |
|---|
| 2546 | scoring methods can be passed to use different methods for different |
|---|
| 2547 | targets. In the latter case, care has to be taken if the ranges of scoring |
|---|
| 2548 | methods differ. |
|---|
| 2549 | For example, when the first target is scored from -1 to 1 (1 best) and the |
|---|
| 2550 | second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both |
|---|
| 2551 | to a span of 1, and invert the second so that higher scores are better. |
|---|
| 2552 | |
|---|
| 2553 | :param score: Single-target scoring method or a list of such methods |
|---|
| 2554 | (one for each target). |
|---|
| 2555 | :param weights: List of real weights, one for each target, |
|---|
| 2556 | for a weighted average. |
|---|
| 2557 | |
|---|
| 2558 | """ |
|---|
| 2559 | if not len(res.results): |
|---|
| 2560 | raise ValueError, "Cannot compute the score: no examples." |
|---|
| 2561 | if res.number_of_learners < 1: |
|---|
| 2562 | return [] |
|---|
| 2563 | n_classes = len(res.results[0].actual_class) |
|---|
| 2564 | if weights is None: |
|---|
| 2565 | weights = [1.] * n_classes |
|---|
| 2566 | if not hasattr(score, '__len__'): |
|---|
| 2567 | score = [score] * n_classes |
|---|
| 2568 | elif len(score) != n_classes: |
|---|
| 2569 | raise ValueError, "Number of scoring methods and targets do not match." |
|---|
| 2570 | # save original classes |
|---|
| 2571 | clsss = [te.classes for te in res.results] |
|---|
| 2572 | aclsss = [te.actual_class for te in res.results] |
|---|
| 2573 | # compute single target scores |
|---|
| 2574 | single_scores = [] |
|---|
| 2575 | for i in range(n_classes): |
|---|
| 2576 | for te, clss, aclss in zip(res.results, clsss, aclsss): |
|---|
| 2577 | te.classes = [cls[i] for cls in clss] |
|---|
| 2578 | te.actual_class = aclss[i] |
|---|
| 2579 | single_scores.append(score[i](res)) |
|---|
| 2580 | # restore original classes |
|---|
| 2581 | for te, clss, aclss in zip(res.results, clsss, aclsss): |
|---|
| 2582 | te.classes = clss |
|---|
| 2583 | te.actual_class = aclss |
|---|
| 2584 | return [sum(w * s for w, s in zip(weights, scores)) / sum(weights) |
|---|
| 2585 | for scores in zip(*single_scores)] |
|---|
| 2586 | |
|---|
| 2587 | def mt_flattened_score(res, score): |
|---|
| 2588 | """ |
|---|
| 2589 | Flatten (concatenate into a single list) the predictions of multiple |
|---|
| 2590 | targets and compute a single-target score. |
|---|
| 2591 | |
|---|
| 2592 | :param score: Single-target scoring method. |
|---|
| 2593 | """ |
|---|
| 2594 | res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations, |
|---|
| 2595 | res.classifier_names, class_values=res.class_values, |
|---|
| 2596 | weights=res.weights, classifiers=res.classifiers, loaded=res.loaded, |
|---|
| 2597 | test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels) |
|---|
| 2598 | for te in res.results: |
|---|
| 2599 | for i, ac in enumerate(te.actual_class): |
|---|
| 2600 | te2 = Orange.evaluation.testing.TestedExample( |
|---|
| 2601 | iteration_number=te.iteration_number, actual_class=ac) |
|---|
| 2602 | for c, p in zip(te.classes, te.probabilities): |
|---|
| 2603 | te2.add_result(c[i], p[i]) |
|---|
| 2604 | res2.results.append(te2) |
|---|
| 2605 | return score(res2) |
|---|
| 2606 | |
|---|
| 2607 | |
|---|
| 2608 | ######################################################################################### |
|---|
| 2609 | if __name__ == "__main__": |
|---|
| 2610 | avranks = [3.143, 2.000, 2.893, 1.964] |
|---|
| 2611 | names = ["prva", "druga", "tretja", "cetrta" ] |
|---|
| 2612 | cd = compute_CD(avranks, 14) |
|---|
| 2613 | #cd = compute_CD(avranks, 10, type="bonferroni-dunn") |
|---|
| 2614 | print cd |
|---|
| 2615 | |
|---|
| 2616 | print compute_friedman(avranks, 14) |
|---|
| 2617 | |
|---|
| 2618 | #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5) |
|---|