Changeset 10633:fb05a6f3a235 in orange for Orange/evaluation/reliability.py
 Timestamp:
 03/23/12 20:20:57 (2 years ago)
 Branch:
 default
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/evaluation/reliability.py
r10393 r10633 55 55 56 56 def get_prediction_error_list(res): 57 return [result.actual Class  result.classes[0] for result in res.results]57 return [result.actual_class  result.classes[0] for result in res.results] 58 58 59 59 def get_description_list(res, i): … … 124 124 sig = [0 for _ in xrange(number_of_estimates)] 125 125 method_list = [0 for _ in xrange(number_of_estimates)] 126 126 127 127 for res in results_by_fold: 128 128 prediction_error = get_prediction_error_list(res) … … 139 139 sig[i] = signed_or_absolute 140 140 method_list[i] = method 141 141 142 142 # Calculate pvalues 143 143 results = [float(res) / number_of_folds for res in results] 144 144 ps = [p_value_from_r(r, number_of_instances) for r in results] 145 145 146 146 return zip(results, ps, sig, method_list) 147 147 … … 151 151 """ 152 152 df = n  2 153 t = r * (df / ((r + 1.0 + 1e30) * (r + 1.0 + 1e30)) )**0.5154 return statc.betai (df * 0.5, 0.5, df /(df + t*t))153 t = r * (df / ((r + 1.0 + 1e30) * (r + 1.0 + 1e30))) ** 0.5 154 return statc.betai (df * 0.5, 0.5, df / (df + t * t)) 155 155 156 156 class Estimate: … … 188 188 189 189 """ 190 def __init__(self, estimate, signed_or_absolute, method, icv_method 190 def __init__(self, estimate, signed_or_absolute, method, icv_method= 1): 191 191 self.estimate = estimate 192 192 self.signed_or_absolute = signed_or_absolute … … 202 202 self.procentage = procentage 203 203 self.estimator = estimator 204 204 205 205 def __call__(self, instances, weight=None, **kwds): 206 206 207 207 # Calculate borders using cross validation 208 208 res = Orange.evaluation.testing.cross_validation([self.estimator], instances) … … 210 210 for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)): 211 211 estimates, signed_or_absolute, method = get_reliability_estimation_list(res, i) 212 sorted_estimates = sorted( 213 borders = [sorted_estimates[int(len(estimates) *p)1] for p in self.procentage]212 sorted_estimates = sorted(abs(x) for x in estimates) 213 borders = [sorted_estimates[int(len(estimates) * p)  1] for p in self.procentage] 214 214 all_borders.append(borders) 215 215 216 216 # Learn on whole train data 217 217 estimator_classifier = self.estimator(instances) 218 218 219 219 return DescriptiveAnalysisClassifier(estimator_classifier, all_borders, self.desc) 220 220 … … 224 224 self.all_borders = all_borders 225 225 self.desc = desc 226 226 227 227 def __call__(self, instance, result_type=Orange.core.GetValue): 228 228 predicted, probabilities = self.estimator_classifier(instance, Orange.core.GetBoth) 229 229 230 230 for borders, estimate in zip(self.all_borders, probabilities.reliability_estimate): 231 231 estimate.text_description = self.desc[0] … … 233 233 if estimate.estimate >= lower_border: 234 234 estimate.text_description = text_desc 235 235 236 236 # Return the appropriate type of result 237 237 if result_type == Orange.core.GetValue: … … 269 269 def __init__(self, e=[0.01, 0.1, 0.5, 1.0, 2.0]): 270 270 self.e = e 271 271 272 272 def __call__(self, instances, learner): 273 273 min_value = max_value = instances[0].getclass().value … … 278 278 min_value = ex.getclass().value 279 279 return SensitivityAnalysisClassifier(self.e, instances, min_value, max_value, learner) 280 280 281 281 class SensitivityAnalysisClassifier: 282 282 def __init__(self, e, instances, min_value, max_value, learner): … … 286 286 self.min_value = min_value 287 287 self.learner = learner 288 288 289 289 def __call__(self, instance, predicted, probabilities): 290 290 # Create new dataset 291 291 r_data = Orange.data.Table(self.instances) 292 292 293 293 # Create new instance 294 294 modified_instance = Orange.data.Instance(instance) 295 295 296 296 # Append it to the data 297 297 r_data.append(modified_instance) 298 298 299 299 # Calculate SAvar & SAbias 300 300 SAvar = SAbias = 0 301 301 302 302 for eps in self.e: 303 303 # +epsilon 304 r_data[1].setclass(predicted.value + eps *(self.max_value  self.min_value))304 r_data[1].setclass(predicted.value + eps * (self.max_value  self.min_value)) 305 305 c = self.learner(r_data) 306 306 k_plus = c(instance, Orange.core.GetValue) 307 307 308 308 # epsilon 309 r_data[1].setclass(predicted.value  eps *(self.max_value  self.min_value))309 r_data[1].setclass(predicted.value  eps * (self.max_value  self.min_value)) 310 310 c = self.learner(r_data) 311 311 k_minus = c(instance, Orange.core.GetValue) … … 316 316 # calculate part SAvar and SAbias 317 317 SAvar += k_plus.value  k_minus.value 318 SAbias += k_plus.value + k_minus.value  2 *predicted.value319 318 SAbias += k_plus.value + k_minus.value  2 * predicted.value 319 320 320 SAvar /= len(self.e) 321 SAbias /= 2 *len(self.e)322 321 SAbias /= 2 * len(self.e) 322 323 323 return [Estimate(SAvar, ABSOLUTE, SAVAR_ABSOLUTE), 324 324 Estimate(SAbias, SIGNED, SABIAS_SIGNED), 325 325 Estimate(abs(SAbias), ABSOLUTE, SABIAS_ABSOLUTE)] 326 326 327 327 class BaggingVariance: 328 328 """ … … 345 345 def __init__(self, m=50): 346 346 self.m = m 347 347 348 348 def __call__(self, instances, learner): 349 349 classifiers = [] 350 350 351 351 # Create bagged classifiers using sampling with replacement 352 352 for _ in xrange(self.m): … … 359 359 def __init__(self, classifiers): 360 360 self.classifiers = classifiers 361 361 362 362 def __call__(self, instance, *args): 363 363 BAGV = 0 364 364 365 365 # Calculate the bagging variance 366 366 bagged_values = [c(instance, Orange.core.GetValue).value for c in self.classifiers if c is not None] 367 367 368 368 k = sum(bagged_values) / len(bagged_values) 369 370 BAGV = sum( (bagged_value  k)**2 for bagged_value in bagged_values) / len(bagged_values)371 369 370 BAGV = sum((bagged_value  k) ** 2 for bagged_value in bagged_values) / len(bagged_values) 371 372 372 return [Estimate(BAGV, ABSOLUTE, BAGV_ABSOLUTE)] 373 373 374 374 class LocalCrossValidation: 375 375 """ … … 397 397 def __init__(self, k=0): 398 398 self.k = k 399 399 400 400 def __call__(self, instances, learner): 401 401 nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor() 402 402 nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean() 403 403 404 404 distance_id = Orange.feature.Descriptor.new_meta_id() 405 405 nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id) 406 406 407 407 if self.k == 0: 408 self.k = max(5, len(instances) /20)409 408 self.k = max(5, len(instances) / 20) 409 410 410 return LocalCrossValidationClassifier(distance_id, nearest_neighbours, self.k, learner) 411 411 … … 416 416 self.k = k 417 417 self.learner = learner 418 418 419 419 def __call__(self, instance, *args): 420 420 LCVer = 0 421 421 LCVdi = 0 422 422 423 423 # Find k nearest neighbors 424 424 425 425 knn = [ex for ex in self.nearest_neighbours(instance, self.k)] 426 426 427 427 # leave one out of prediction error 428 428 for i in xrange(len(knn)): 429 429 train = knn[:] 430 430 del train[i] 431 431 432 432 classifier = self.learner(Orange.data.Table(train)) 433 433 434 434 returned_value = classifier(knn[i], Orange.core.GetValue) 435 435 436 436 e = abs(knn[i].getclass().value  returned_value.value) 437 437 438 438 LCVer += e * math.exp(knn[i][self.distance_id]) 439 439 LCVdi += math.exp(knn[i][self.distance_id]) 440 440 441 441 LCV = LCVer / LCVdi if LCVdi != 0 else 0 442 442 if math.isnan(LCV): … … 464 464 def __init__(self, k=5): 465 465 self.k = k 466 466 467 467 def __call__(self, instances, learner): 468 468 nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor() 469 469 nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean() 470 470 471 471 distance_id = Orange.feature.Descriptor.new_meta_id() 472 472 nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id) … … 477 477 self.nearest_neighbours = nearest_neighbours 478 478 self.k = k 479 479 480 480 def __call__(self, instance, predicted, probabilities): 481 481 CNK = 0 482 482 483 483 # Find k nearest neighbors 484 484 485 485 knn = [ex for ex in self.nearest_neighbours(instance, self.k)] 486 486 487 487 # average label of neighbors 488 488 for ex in knn: 489 489 CNK += ex.getclass().value 490 490 491 491 CNK /= self.k 492 492 CNK = predicted.value 493 493 494 494 return [Estimate(CNK, SIGNED, CNK_SIGNED), 495 495 Estimate(abs(CNK), ABSOLUTE, CNK_ABSOLUTE)] 496 496 497 497 class Mahalanobis: 498 498 """ … … 511 511 def __init__(self, k=3): 512 512 self.k = k 513 513 514 514 def __call__(self, instances, *args): 515 515 nnm = Orange.classification.knn.FindNearestConstructor() 516 516 nnm.distanceConstructor = Orange.distance.Mahalanobis() 517 517 518 518 mid = Orange.feature.Descriptor.new_meta_id() 519 519 nnm = nnm(instances, 0, mid) … … 525 525 self.nnm = nnm 526 526 self.mid = mid 527 527 528 528 def __call__(self, instance, *args): 529 529 mahalanobis_distance = 0 530 530 531 531 mahalanobis_distance = sum(ex[self.mid].value for ex in self.nnm(instance, self.k)) 532 532 533 533 return [ Estimate(mahalanobis_distance, ABSOLUTE, MAHAL_ABSOLUTE) ] 534 534 … … 545 545 def __init__(self): 546 546 pass 547 547 548 548 def __call__(self, instances, *args): 549 549 dc = Orange.core.DomainContinuizer() … … 551 551 dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan 552 552 dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues 553 553 554 554 new_domain = dc(instances) 555 555 new_instances = instances.translate(new_domain) 556 556 557 557 X, _, _ = new_instances.to_numpy() 558 558 instance_avg = numpy.average(X, 0) 559 559 560 560 distance_constructor = Orange.distance.Mahalanobis() 561 561 distance = distance_constructor(new_instances) 562 562 563 563 average_instance = Orange.data.Instance(new_instances.domain, list(instance_avg) + ["?"]) 564 564 565 565 return MahalanobisToCenterClassifier(distance, average_instance, new_domain) 566 566 … … 570 570 self.average_instance = average_instance 571 571 self.new_domain = new_domain 572 572 573 573 def __call__(self, instance, *args): 574 574 575 575 inst = Orange.data.Instance(self.new_domain, instance) 576 576 577 577 mahalanobis_to_center = self.distance(inst, self.average_instance) 578 578 579 579 return [ Estimate(mahalanobis_to_center, ABSOLUTE, MAHAL_TO_CENTER_ABSOLUTE) ] 580 580 … … 598 598 self.bagv = bagv 599 599 self.cnk = cnk 600 600 601 601 def __call__(self, instances, learner): 602 602 bagv_classifier = self.bagv(instances, learner) … … 608 608 self.bagv_classifier = bagv_classifier 609 609 self.cnk_classifier = cnk_classifier 610 610 611 611 def __call__(self, instance, predicted, probabilities): 612 612 bagv_estimates = self.bagv_classifier(instance, predicted, probabilities) 613 613 cnk_estimates = self.cnk_classifier(instance, predicted, probabilities) 614 615 bvck_value = (bagv_estimates[0].estimate + cnk_estimates[1].estimate) /2614 615 bvck_value = (bagv_estimates[0].estimate + cnk_estimates[1].estimate) / 2 616 616 bvck_estimates = [ Estimate(bvck_value, ABSOLUTE, BVCK_ABSOLUTE) ] 617 617 bvck_estimates.extend(bagv_estimates) … … 622 622 def __init__(self): 623 623 pass 624 624 625 625 def __call__(self, instances, learner): 626 626 res = Orange.evaluation.testing.cross_validation([learner], instances) 627 627 prediction_errors = get_prediction_error_list(res) 628 628 629 629 new_domain = Orange.data.Domain(instances.domain.attributes, Orange.core.FloatVariable("pe")) 630 630 new_dataset = Orange.data.Table(new_domain, instances) 631 631 632 632 for instance, prediction_error in izip(new_dataset, prediction_errors): 633 633 instance.set_class(prediction_error) 634 634 635 635 rf = Orange.ensemble.forest.RandomForestLearner() 636 636 rf_classifier = rf(new_dataset) 637 637 638 638 return ErrorPredictingClassification(rf_classifier, new_domain) 639 639 640 640 class ErrorPredictingClassification: 641 641 def __init__(self, rf_classifier, new_domain): 642 642 self.rf_classifier = rf_classifier 643 643 self.new_domain = new_domain 644 644 645 645 def __call__(self, instance, predicted, probabilities): 646 646 new_instance = Orange.data.Instance(self.new_domain, instance) 647 647 value = self.rf_classifier(new_instance, Orange.core.GetValue) 648 648 649 649 return [Estimate(value.value, SIGNED, SABIAS_SIGNED)] 650 650 … … 682 682 self.box_learner = box_learner 683 683 self.blending = False 684 685 684 685 686 686 def __call__(self, instances, weight=None, **kwds): 687 687 """Learn from the given table of data instances. … … 693 693 :rtype: :class:`Orange.evaluation.reliability.Classifier` 694 694 """ 695 695 696 696 blending_classifier = None 697 697 new_domain = None 698 698 699 699 if instances.domain.class_var.var_type != Orange.feature.Continuous.Continuous: 700 700 raise Exception("This method only works on data with continuous class.") 701 701 702 702 return Classifier(instances, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier) 703 703 704 704 def internal_cross_validation(self, instances, folds=10): 705 705 """ Perform the internal cross validation for getting the best … … 720 720 sorted_results = sorted(results) 721 721 return sorted_results[1][3] 722 722 723 723 def internal_cross_validation_testing(self, instances, folds=10): 724 724 """ Perform internal cross validation (as in Automatic selection of … … 735 735 """ 736 736 cv_indices = Orange.core.MakeRandomIndicesCV(instances, folds) 737 737 738 738 list_of_rs = [] 739 739 740 740 sum_of_rs = defaultdict(float) 741 741 742 742 for fold in xrange(folds): 743 743 data = instances.select(cv_indices, fold) … … 751 751 sorted_sum_of_rs = sorted(sum_of_rs.items(), key=lambda estimate: estimate[1], reverse=True) 752 752 return sorted_sum_of_rs[0][0] 753 753 754 754 labels = ["SAvar", "SAbias", "BAGV", "CNK", "LCV", "BVCK", "Mahalanobis", "ICV"] 755 755 … … 774 774 self.blending_domain = blending_domain 775 775 self.rf_classifier = rf_classifier 776 776 777 777 # Train the learner with original data 778 778 self.classifier = box_learner(instances) 779 779 780 780 # Train all the estimators and create their classifiers 781 781 self.estimation_classifiers = [estimator(instances, box_learner) for estimator in estimators] 782 782 783 783 def __call__(self, instance, result_type=Orange.core.GetValue): 784 784 """ … … 802 802 """ 803 803 predicted, probabilities = self.classifier(instance, Orange.core.GetBoth) 804 804 805 805 # Create a place holder for estimates 806 806 if probabilities is None: … … 809 809 # warnings.simplefilter("ignore") 810 810 probabilities.setattr('reliability_estimate', []) 811 811 812 812 # Calculate all the estimates and add them to the results 813 813 for estimate in self.estimation_classifiers: 814 814 probabilities.reliability_estimate.extend(estimate(instance, predicted, probabilities)) 815 815 816 816 # Return the appropriate type of result 817 817 if result_type == Orange.core.GetValue:
Note: See TracChangeset
for help on using the changeset viewer.