Changeset 8902:b42747bac52a in orange


Ignore:
Timestamp:
09/05/11 11:07:34 (3 years ago)
Author:
mocnik <mocnik@…>
Branch:
default
Convert:
bf833b41cb965d050fdb1b1b60c2d1f1d0e49d1a
Message:

Updating the documentation examples and keeping it actual.

Location:
orange
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/evaluation/reliability.py

    r8059 r8902  
    1818regression predictions, Zoran Bosnic 2008. 
    1919 
    20 The following example shows a basic usage of reliability estimates 
     20Next example shows basic reliability estimation usage  
     21(`reliability_basic.py`_, uses `housing.tab`_): 
     22 
     23.. literalinclude:: code/reliability_basic.py 
     24 
     25First we load our desired data table and choose on learner we want to use  
     26reliability estimation on. We also want to calculate only the Mahalanobis and  
     27local cross validation estimates with desired parameters. We learn our  
     28estimator on data, and estimate the reliability for first instance of data table. 
     29We output the estimates used and the numbers. 
     30 
     31We can also do reliability estimation on whole data table not only on single 
     32instance. Example shows us doing cross validation on the desired data table, 
     33using default reliability estimates, and at the ending output reliability 
     34estimates for the first instance of data table. 
    2135(`reliability-run.py`_, uses `housing.tab`_): 
    2236 
    2337.. literalinclude:: code/reliability-run.py 
    2438 
    25 Reliability estimation methods are computationaly quite hard so it may take 
     39Reliability estimation methods are computationally quite hard so it may take 
    2640a bit of time for this script to produce a result. In the above example we 
    2741first create a learner that we're interested in, in this example 
    28 k-nearest-neighbours, and use it inside reliability learner and do cross 
     42k-nearest-neighbors, and use it inside reliability learner and do cross 
    2943validation to get the results. Now we output for the first example in the 
    30 dataset all the reliability estimates and their names. 
     44data table all the reliability estimates and their names. 
    3145 
    3246Reliability Methods 
     
    140154 
    141155.. literalinclude:: code/reliability-long.py 
    142     :lines: 30-43 
     156    :lines: 30-42 
    143157 
    144158In this part of the example we have a usual prediction problem, we have a  
     
    176190<http://journals.cambridge.org/abstract_S0269888909990154>`_ 
    177191*The Knowledge Engineering Review* 25(1), 27-47. 
    178  
    179192""" 
    180193import Orange 
     
    184197import math 
    185198import warnings 
     199 
     200from collections import defaultdict 
     201from itertools import izip 
     202 
     203import Orange.regression.linear 
    186204 
    187205# Labels and final variables 
     
    206224BVCK_ABSOLUTE = 7 
    207225MAHAL_ABSOLUTE = 8 
     226BLENDING_ABSOLUTE = 9 
    208227ICV_METHOD = 10 
    209228 
     
    216235               3: "BAGV absolute", 4: "CNK signed", 5: "CNK absolute", 
    217236               6: "LCV absolute", 7: "BVCK_absolute", 8: "Mahalanobis absolute", 
    218                10: "ICV"} 
     237               9: "BLENDING absolute", 10: "ICV", 11: "RF Variance", 12: "RF Std"} 
     238 
     239select_with_repeat = Orange.core.MakeRandomIndicesMultiple() 
     240select_with_repeat.random_generator = Orange.core.RandomGenerator() 
    219241 
    220242def get_reliability_estimation_list(res, i): 
     
    223245def get_prediction_error_list(res): 
    224246    return [result.actualClass - result.classes[0] for result in res.results] 
     247 
     248def get_description_list(res, i): 
     249    return [result.probabilities[0].reliability_estimate[i].text_description for result in res.results] 
    225250 
    226251def get_pearson_r(res): 
     
    239264            else: 
    240265                r, p = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate) 
     266        except Exception: 
     267            r = p = float("NaN") 
     268        results.append((r, p, signed_or_absolute, method)) 
     269    return results 
     270 
     271def get_spearman_r(res): 
     272    """ 
     273    Returns Spearmans coefficient between the prediction error and each of the 
     274    used reliability estimates. Function also return the p-value of each of 
     275    the coefficients. 
     276    """ 
     277    prediction_error = get_prediction_error_list(res) 
     278    results = [] 
     279    for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)): 
     280        reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i) 
     281        try: 
     282            if signed_or_absolute == SIGNED: 
     283                r, p = statc.spearmanr(prediction_error, reliability_estimate) 
     284            else: 
     285                r, p = statc.spearmanr([abs(pe) for pe in prediction_error], reliability_estimate) 
    241286        except Exception: 
    242287            r = p = float("NaN") 
     
    294339        self.icv_method = icv_method 
    295340        self.icv_method_name = METHOD_NAME[icv_method] if icv_method != -1 else "" 
    296          
     341        self.text_description = None 
     342 
     343class DescriptiveAnalysis: 
     344    def __init__(self, estimator, desc=["high", "medium", "low"], procentage=[0.00, 0.33, 0.66]): 
     345        self.desc = desc 
     346        self.procentage = procentage 
     347        self.estimator = estimator 
     348     
     349    def __call__(self, examples, weight=None, **kwds): 
     350         
     351        # Calculate borders using cross validation 
     352        res = Orange.evaluation.testing.cross_validation([self.estimator], examples) 
     353        all_borders = [] 
     354        for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)): 
     355            estimates, signed_or_absolute, method = get_reliability_estimation_list(res, i) 
     356            sorted_estimates = sorted( abs(x) for x in estimates) 
     357            borders = [sorted_estimates[int(len(estimates)*p)-1]  for p in self.procentage] 
     358            all_borders.append(borders) 
     359         
     360        # Learn on whole train data 
     361        estimator_classifier = self.estimator(examples) 
     362         
     363        return DescriptiveAnalysisClassifier(estimator_classifier, all_borders, self.desc) 
     364 
     365class DescriptiveAnalysisClassifier: 
     366    def __init__(self, estimator_classifier, all_borders, desc): 
     367        self.estimator_classifier = estimator_classifier 
     368        self.all_borders = all_borders 
     369        self.desc = desc 
     370     
     371    def __call__(self, example, result_type=Orange.core.GetValue): 
     372        predicted, probabilities = self.estimator_classifier(example, Orange.core.GetBoth) 
     373         
     374        for borders, estimate in zip(self.all_borders, probabilities.reliability_estimate): 
     375            estimate.text_description = self.desc[0] 
     376            for lower_border, text_desc in zip(borders, self.desc): 
     377                if estimate.estimate >= lower_border: 
     378                    estimate.text_description = text_desc 
     379         
     380        # Return the appropriate type of result 
     381        if result_type == Orange.core.GetValue: 
     382            return predicted 
     383        elif result_type == Orange.core.GetProbabilities: 
     384            return probabilities 
     385        else: 
     386            return predicted, probabilities 
     387 
    297388class SensitivityAnalysis: 
    298389    """ 
     
    330421     
    331422class SensitivityAnalysisClassifier: 
    332     def __init__(self, e, examples, max_value, min_value, learner): 
     423    def __init__(self, e, examples, min_value, max_value, learner): 
    333424        self.e = e 
    334425        self.examples = examples 
     
    402493        # Create bagged classifiers using sampling with replacement 
    403494        for _ in xrange(self.m): 
    404             selection = [random.randrange(len(examples)) for _ in xrange(len(examples))] 
    405             data = examples.getitems(selection) 
     495            selection = select_with_repeat(len(examples)) 
     496            data = examples.select(selection) 
    406497            classifiers.append(learner(data)) 
    407498        return BaggingVarianceClassifier(classifiers) 
     
    448539     
    449540    """ 
    450     def __init__(self, k=5): 
     541    def __init__(self, k=0): 
    451542        self.k = k 
    452543     
     
    457548        distance_id = Orange.core.newmetaid() 
    458549        nearest_neighbours = nearest_neighbours_constructor(examples, 0, distance_id) 
     550         
     551        if self.k == 0: 
     552            self.k = max(5, len(examples)/20) 
    459553         
    460554        return LocalCrossValidationClassifier(distance_id, nearest_neighbours, self.k, learner) 
     
    619713        bvck_estimates.extend(cnk_estimates) 
    620714        return bvck_estimates 
     715 
     716class ErrorPredicting: 
     717    def __init__(self): 
     718        pass 
     719     
     720    def __call__(self, examples, learner): 
     721        res = Orange.evaluation.testing.cross_validation([learner], examples) 
     722        prediction_errors = get_prediction_error_list(res) 
     723         
     724        new_domain = Orange.data.Domain(examples.domain.attributes, Orange.core.FloatVariable("pe")) 
     725        new_dataset = Orange.data.Table(new_domain, examples) 
     726         
     727        for example, prediction_error in izip(new_dataset, prediction_errors): 
     728            example.set_class(prediction_error) 
     729         
     730        rf = Orange.ensemble.forest.RandomForestLearner() 
     731        rf_classifier = rf(new_dataset) 
     732         
     733        return ErrorPredictingClassification(rf_classifier, new_domain) 
     734         
     735class ErrorPredictingClassification: 
     736    def __init__(self, rf_classifier, new_domain): 
     737        self.rf_classifier = rf_classifier 
     738        self.new_domain = new_domain 
     739     
     740    def __call__(self, example, predicted, probabilities): 
     741        new_example = Orange.data.Instance(self.new_domain, example) 
     742        value = self.rf_classifier(new_example, Orange.core.GetValue) 
     743         
     744        return [Estimate(value.value, SIGNED, SABIAS_SIGNED)] 
    621745 
    622746class Learner: 
     
    632756    (estimate, signed_or_absolute, method). 
    633757     
    634     :param e: List of possible e value for SAvar and SAbias reliability estimate 
    635     :type e: list of floats 
    636      
    637     :param m: Number of bagged models to be used with BAGV estimate 
    638     :type m: int 
    639      
    640     :param cnk_k: Number of nearest neighbours used in CNK estimate 
    641     :type cnk_k: int 
    642      
    643     :param lcv_k: Number of nearest neighbours used in LCV estimate 
    644     :type cnk_k: int 
    645      
    646     :param icv: Use internal cross-validation. Internal cross-validation calculates all 
    647                 the reliability estimates on the training data using cross-validation. 
    648                 Then it chooses the most successful estimate and uses it on the test 
    649                 dataset. 
    650     :type icv: boolean 
    651      
    652     :param use: List of booleans saying which reliability methods should be 
    653                 used in our experiment and which not. 
    654     :type use: list of booleans 
    655      
    656     :param use_with_icv: List of booleans saying which reliability methods 
    657                          should be used in inside cross validation and 
    658                          which not. 
    659      
    660     :type use_with_icv: list of booleans 
     758    :param box_learner: Learner we want to wrap into reliability estimation 
     759    :type box_learner: learner 
     760     
     761    :param estimators: List of different reliability estimation methods we 
     762                       want to use on the chosen learner. 
     763    :type estimators: list of reliability estimators 
     764     
     765    :param name: Name of this reliability learner 
     766    :type name: string 
    661767     
    662768    :rtype: :class:`Orange.evaluation.reliability.Learner` 
     
    664770    def __init__(self, box_learner, name="Reliability estimation", 
    665771                 estimators = [SensitivityAnalysis(), 
    666                                BaggingVariance(), 
    667772                               LocalCrossValidation(), 
    668                                CNeighbours(), 
    669                                Mahalanobis()], **kwds): 
     773                               BaggingVarianceCNeighbours(), 
     774                               Mahalanobis(), 
     775                               ], 
     776                 blending = False, **kwds): 
    670777        self.__dict__.update(kwds) 
    671778        self.name = name 
    672779        self.estimators = estimators 
    673780        self.box_learner = box_learner 
     781        self.blending = blending 
    674782         
    675783     
     
    683791        :rtype: :class:`Orange.evaluation.reliability.Classifier` 
    684792        """ 
    685         return Classifier(examples, self.box_learner, self.estimators) 
     793         
     794        blending_classifier = None 
     795        new_domain = None 
     796         
     797        # Perform blending of the reliability estimates 
     798        if self.blending: 
     799            # Do the internal cross validation to get the estimates on training set 
     800            self.blending = False 
     801            res = Orange.evaluation.testing.cross_validation([self], examples) 
     802            self.blending = True 
     803             
     804            # Create new domain 
     805            new_domain = Orange.data.Domain([Orange.core.FloatVariable(estimate.method_name) for estimate in res.results[0].probabilities[0].reliability_estimate], Orange.core.FloatVariable("pe")) 
     806             
     807            # Create dataset with this domain 
     808            new_dataset = Orange.data.Table(new_domain) 
     809             
     810            for result in res.results: 
     811                values = [estimate.estimate for estimate in result.probabilities[0].reliability_estimate] + [abs(result.actualClass - result.classes[0])] 
     812                new_example = Orange.data.Instance(new_domain, values) 
     813                new_dataset.append(new_example) 
     814             
     815            # Learn some learner on new dataset 
     816            #blender = Orange.classification.svm.SVMLearner() 
     817            #blender.svm_type = blender.Nu_SVR 
     818            blender = Orange.regression.linear.LinearRegressionLearner() 
     819             
     820            blending_classifier = blender(new_dataset) 
     821             
     822            print get_pearson_r(res) 
     823            print blending_classifier 
     824         
     825        return Classifier(examples, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier) 
    686826     
    687827    def internal_cross_validation(self, examples, folds=10): 
     
    698838        for fold in xrange(folds): 
    699839            data = examples.select(cv_indices, fold) 
    700             res = Orange.evaluation.testing.crossValidation([self], data) 
     840            res = Orange.evaluation.testing.cross_validation([self], data) 
    701841            results = get_pearson_r(res) 
    702842            for r, _, _, method in results: 
    703843                sum_of_rs[method] += r 
    704844        sorted_sum_of_rs = sorted(sum_of_rs.items(), key=lambda estimate: estimate[1], reverse=True) 
     845        print sorted_sum_of_rs 
    705846        return sorted_sum_of_rs[0][0] 
    706847     
     
    708849 
    709850class Classifier: 
    710     def __init__(self, examples, box_learner, estimators, **kwds): 
     851    def __init__(self, examples, box_learner, estimators, blending, blending_domain, rf_classifier, **kwds): 
    711852        self.__dict__.update(kwds) 
    712853        self.examples = examples 
    713854        self.box_learner = box_learner 
    714855        self.estimators = estimators 
     856        self.blending = blending 
     857        self.blending_domain = blending_domain 
     858        self.rf_classifier = rf_classifier 
    715859         
    716860        # Train the learner with original data 
     
    748892            probabilities.reliability_estimate.extend(estimate(example, predicted, probabilities)) 
    749893         
     894        # Do the blending part 
     895        if self.blending: 
     896            # Create an example 
     897            values = [estimate.estimate for estimate in probabilities.reliability_estimate] + ["?"] 
     898            new_example = Orange.data.Instance(self.blending_domain, values) 
     899            blending_value = self.rf_classifier(new_example, Orange.core.GetValue) 
     900            probabilities.reliability_estimate.append(Estimate(blending_value.value, ABSOLUTE, BLENDING_ABSOLUTE)) 
     901             
    750902        # Return the appropriate type of result 
    751903        if result_type == Orange.core.GetValue: 
Note: See TracChangeset for help on using the changeset viewer.