Changeset 8902:b42747bac52a in orange
 Timestamp:
 09/05/11 11:07:34 (3 years ago)
 Branch:
 default
 Convert:
 bf833b41cb965d050fdb1b1b60c2d1f1d0e49d1a
 Location:
 orange
 Files:

 1 added
 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/evaluation/reliability.py
r8059 r8902 18 18 regression predictions, Zoran Bosnic 2008. 19 19 20 The following example shows a basic usage of reliability estimates 20 Next example shows basic reliability estimation usage 21 (`reliability_basic.py`_, uses `housing.tab`_): 22 23 .. literalinclude:: code/reliability_basic.py 24 25 First we load our desired data table and choose on learner we want to use 26 reliability estimation on. We also want to calculate only the Mahalanobis and 27 local cross validation estimates with desired parameters. We learn our 28 estimator on data, and estimate the reliability for first instance of data table. 29 We output the estimates used and the numbers. 30 31 We can also do reliability estimation on whole data table not only on single 32 instance. Example shows us doing cross validation on the desired data table, 33 using default reliability estimates, and at the ending output reliability 34 estimates for the first instance of data table. 21 35 (`reliabilityrun.py`_, uses `housing.tab`_): 22 36 23 37 .. literalinclude:: code/reliabilityrun.py 24 38 25 Reliability estimation methods are computational y quite hard so it may take39 Reliability estimation methods are computationally quite hard so it may take 26 40 a bit of time for this script to produce a result. In the above example we 27 41 first create a learner that we're interested in, in this example 28 knearestneighbo urs, and use it inside reliability learner and do cross42 knearestneighbors, and use it inside reliability learner and do cross 29 43 validation to get the results. Now we output for the first example in the 30 data setall the reliability estimates and their names.44 data table all the reliability estimates and their names. 31 45 32 46 Reliability Methods … … 140 154 141 155 .. literalinclude:: code/reliabilitylong.py 142 :lines: 304 3156 :lines: 3042 143 157 144 158 In this part of the example we have a usual prediction problem, we have a … … 176 190 <http://journals.cambridge.org/abstract_S0269888909990154>`_ 177 191 *The Knowledge Engineering Review* 25(1), 2747. 178 179 192 """ 180 193 import Orange … … 184 197 import math 185 198 import warnings 199 200 from collections import defaultdict 201 from itertools import izip 202 203 import Orange.regression.linear 186 204 187 205 # Labels and final variables … … 206 224 BVCK_ABSOLUTE = 7 207 225 MAHAL_ABSOLUTE = 8 226 BLENDING_ABSOLUTE = 9 208 227 ICV_METHOD = 10 209 228 … … 216 235 3: "BAGV absolute", 4: "CNK signed", 5: "CNK absolute", 217 236 6: "LCV absolute", 7: "BVCK_absolute", 8: "Mahalanobis absolute", 218 10: "ICV"} 237 9: "BLENDING absolute", 10: "ICV", 11: "RF Variance", 12: "RF Std"} 238 239 select_with_repeat = Orange.core.MakeRandomIndicesMultiple() 240 select_with_repeat.random_generator = Orange.core.RandomGenerator() 219 241 220 242 def get_reliability_estimation_list(res, i): … … 223 245 def get_prediction_error_list(res): 224 246 return [result.actualClass  result.classes[0] for result in res.results] 247 248 def get_description_list(res, i): 249 return [result.probabilities[0].reliability_estimate[i].text_description for result in res.results] 225 250 226 251 def get_pearson_r(res): … … 239 264 else: 240 265 r, p = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate) 266 except Exception: 267 r = p = float("NaN") 268 results.append((r, p, signed_or_absolute, method)) 269 return results 270 271 def get_spearman_r(res): 272 """ 273 Returns Spearmans coefficient between the prediction error and each of the 274 used reliability estimates. Function also return the pvalue of each of 275 the coefficients. 276 """ 277 prediction_error = get_prediction_error_list(res) 278 results = [] 279 for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)): 280 reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i) 281 try: 282 if signed_or_absolute == SIGNED: 283 r, p = statc.spearmanr(prediction_error, reliability_estimate) 284 else: 285 r, p = statc.spearmanr([abs(pe) for pe in prediction_error], reliability_estimate) 241 286 except Exception: 242 287 r = p = float("NaN") … … 294 339 self.icv_method = icv_method 295 340 self.icv_method_name = METHOD_NAME[icv_method] if icv_method != 1 else "" 296 341 self.text_description = None 342 343 class DescriptiveAnalysis: 344 def __init__(self, estimator, desc=["high", "medium", "low"], procentage=[0.00, 0.33, 0.66]): 345 self.desc = desc 346 self.procentage = procentage 347 self.estimator = estimator 348 349 def __call__(self, examples, weight=None, **kwds): 350 351 # Calculate borders using cross validation 352 res = Orange.evaluation.testing.cross_validation([self.estimator], examples) 353 all_borders = [] 354 for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)): 355 estimates, signed_or_absolute, method = get_reliability_estimation_list(res, i) 356 sorted_estimates = sorted( abs(x) for x in estimates) 357 borders = [sorted_estimates[int(len(estimates)*p)1] for p in self.procentage] 358 all_borders.append(borders) 359 360 # Learn on whole train data 361 estimator_classifier = self.estimator(examples) 362 363 return DescriptiveAnalysisClassifier(estimator_classifier, all_borders, self.desc) 364 365 class DescriptiveAnalysisClassifier: 366 def __init__(self, estimator_classifier, all_borders, desc): 367 self.estimator_classifier = estimator_classifier 368 self.all_borders = all_borders 369 self.desc = desc 370 371 def __call__(self, example, result_type=Orange.core.GetValue): 372 predicted, probabilities = self.estimator_classifier(example, Orange.core.GetBoth) 373 374 for borders, estimate in zip(self.all_borders, probabilities.reliability_estimate): 375 estimate.text_description = self.desc[0] 376 for lower_border, text_desc in zip(borders, self.desc): 377 if estimate.estimate >= lower_border: 378 estimate.text_description = text_desc 379 380 # Return the appropriate type of result 381 if result_type == Orange.core.GetValue: 382 return predicted 383 elif result_type == Orange.core.GetProbabilities: 384 return probabilities 385 else: 386 return predicted, probabilities 387 297 388 class SensitivityAnalysis: 298 389 """ … … 330 421 331 422 class SensitivityAnalysisClassifier: 332 def __init__(self, e, examples, m ax_value, min_value, learner):423 def __init__(self, e, examples, min_value, max_value, learner): 333 424 self.e = e 334 425 self.examples = examples … … 402 493 # Create bagged classifiers using sampling with replacement 403 494 for _ in xrange(self.m): 404 selection = [random.randrange(len(examples)) for _ in xrange(len(examples))]405 data = examples. getitems(selection)495 selection = select_with_repeat(len(examples)) 496 data = examples.select(selection) 406 497 classifiers.append(learner(data)) 407 498 return BaggingVarianceClassifier(classifiers) … … 448 539 449 540 """ 450 def __init__(self, k= 5):541 def __init__(self, k=0): 451 542 self.k = k 452 543 … … 457 548 distance_id = Orange.core.newmetaid() 458 549 nearest_neighbours = nearest_neighbours_constructor(examples, 0, distance_id) 550 551 if self.k == 0: 552 self.k = max(5, len(examples)/20) 459 553 460 554 return LocalCrossValidationClassifier(distance_id, nearest_neighbours, self.k, learner) … … 619 713 bvck_estimates.extend(cnk_estimates) 620 714 return bvck_estimates 715 716 class ErrorPredicting: 717 def __init__(self): 718 pass 719 720 def __call__(self, examples, learner): 721 res = Orange.evaluation.testing.cross_validation([learner], examples) 722 prediction_errors = get_prediction_error_list(res) 723 724 new_domain = Orange.data.Domain(examples.domain.attributes, Orange.core.FloatVariable("pe")) 725 new_dataset = Orange.data.Table(new_domain, examples) 726 727 for example, prediction_error in izip(new_dataset, prediction_errors): 728 example.set_class(prediction_error) 729 730 rf = Orange.ensemble.forest.RandomForestLearner() 731 rf_classifier = rf(new_dataset) 732 733 return ErrorPredictingClassification(rf_classifier, new_domain) 734 735 class ErrorPredictingClassification: 736 def __init__(self, rf_classifier, new_domain): 737 self.rf_classifier = rf_classifier 738 self.new_domain = new_domain 739 740 def __call__(self, example, predicted, probabilities): 741 new_example = Orange.data.Instance(self.new_domain, example) 742 value = self.rf_classifier(new_example, Orange.core.GetValue) 743 744 return [Estimate(value.value, SIGNED, SABIAS_SIGNED)] 621 745 622 746 class Learner: … … 632 756 (estimate, signed_or_absolute, method). 633 757 634 :param e: List of possible e value for SAvar and SAbias reliability estimate 635 :type e: list of floats 636 637 :param m: Number of bagged models to be used with BAGV estimate 638 :type m: int 639 640 :param cnk_k: Number of nearest neighbours used in CNK estimate 641 :type cnk_k: int 642 643 :param lcv_k: Number of nearest neighbours used in LCV estimate 644 :type cnk_k: int 645 646 :param icv: Use internal crossvalidation. Internal crossvalidation calculates all 647 the reliability estimates on the training data using crossvalidation. 648 Then it chooses the most successful estimate and uses it on the test 649 dataset. 650 :type icv: boolean 651 652 :param use: List of booleans saying which reliability methods should be 653 used in our experiment and which not. 654 :type use: list of booleans 655 656 :param use_with_icv: List of booleans saying which reliability methods 657 should be used in inside cross validation and 658 which not. 659 660 :type use_with_icv: list of booleans 758 :param box_learner: Learner we want to wrap into reliability estimation 759 :type box_learner: learner 760 761 :param estimators: List of different reliability estimation methods we 762 want to use on the chosen learner. 763 :type estimators: list of reliability estimators 764 765 :param name: Name of this reliability learner 766 :type name: string 661 767 662 768 :rtype: :class:`Orange.evaluation.reliability.Learner` … … 664 770 def __init__(self, box_learner, name="Reliability estimation", 665 771 estimators = [SensitivityAnalysis(), 666 BaggingVariance(),667 772 LocalCrossValidation(), 668 CNeighbours(), 669 Mahalanobis()], **kwds): 773 BaggingVarianceCNeighbours(), 774 Mahalanobis(), 775 ], 776 blending = False, **kwds): 670 777 self.__dict__.update(kwds) 671 778 self.name = name 672 779 self.estimators = estimators 673 780 self.box_learner = box_learner 781 self.blending = blending 674 782 675 783 … … 683 791 :rtype: :class:`Orange.evaluation.reliability.Classifier` 684 792 """ 685 return Classifier(examples, self.box_learner, self.estimators) 793 794 blending_classifier = None 795 new_domain = None 796 797 # Perform blending of the reliability estimates 798 if self.blending: 799 # Do the internal cross validation to get the estimates on training set 800 self.blending = False 801 res = Orange.evaluation.testing.cross_validation([self], examples) 802 self.blending = True 803 804 # Create new domain 805 new_domain = Orange.data.Domain([Orange.core.FloatVariable(estimate.method_name) for estimate in res.results[0].probabilities[0].reliability_estimate], Orange.core.FloatVariable("pe")) 806 807 # Create dataset with this domain 808 new_dataset = Orange.data.Table(new_domain) 809 810 for result in res.results: 811 values = [estimate.estimate for estimate in result.probabilities[0].reliability_estimate] + [abs(result.actualClass  result.classes[0])] 812 new_example = Orange.data.Instance(new_domain, values) 813 new_dataset.append(new_example) 814 815 # Learn some learner on new dataset 816 #blender = Orange.classification.svm.SVMLearner() 817 #blender.svm_type = blender.Nu_SVR 818 blender = Orange.regression.linear.LinearRegressionLearner() 819 820 blending_classifier = blender(new_dataset) 821 822 print get_pearson_r(res) 823 print blending_classifier 824 825 return Classifier(examples, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier) 686 826 687 827 def internal_cross_validation(self, examples, folds=10): … … 698 838 for fold in xrange(folds): 699 839 data = examples.select(cv_indices, fold) 700 res = Orange.evaluation.testing.cross Validation([self], data)840 res = Orange.evaluation.testing.cross_validation([self], data) 701 841 results = get_pearson_r(res) 702 842 for r, _, _, method in results: 703 843 sum_of_rs[method] += r 704 844 sorted_sum_of_rs = sorted(sum_of_rs.items(), key=lambda estimate: estimate[1], reverse=True) 845 print sorted_sum_of_rs 705 846 return sorted_sum_of_rs[0][0] 706 847 … … 708 849 709 850 class Classifier: 710 def __init__(self, examples, box_learner, estimators, **kwds):851 def __init__(self, examples, box_learner, estimators, blending, blending_domain, rf_classifier, **kwds): 711 852 self.__dict__.update(kwds) 712 853 self.examples = examples 713 854 self.box_learner = box_learner 714 855 self.estimators = estimators 856 self.blending = blending 857 self.blending_domain = blending_domain 858 self.rf_classifier = rf_classifier 715 859 716 860 # Train the learner with original data … … 748 892 probabilities.reliability_estimate.extend(estimate(example, predicted, probabilities)) 749 893 894 # Do the blending part 895 if self.blending: 896 # Create an example 897 values = [estimate.estimate for estimate in probabilities.reliability_estimate] + ["?"] 898 new_example = Orange.data.Instance(self.blending_domain, values) 899 blending_value = self.rf_classifier(new_example, Orange.core.GetValue) 900 probabilities.reliability_estimate.append(Estimate(blending_value.value, ABSOLUTE, BLENDING_ABSOLUTE)) 901 750 902 # Return the appropriate type of result 751 903 if result_type == Orange.core.GetValue:
Note: See TracChangeset
for help on using the changeset viewer.