Changeset 9682:3c3286e36602 in orange


Ignore:
Timestamp:
02/06/12 10:46:39 (2 years ago)
Author:
Matija Polajnar <matija.polajnar@…>
Branch:
default
Message:

Reliability: refactor example->instance.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/evaluation/reliability.py

    r9680 r9682  
    119119    results_by_fold = Orange.evaluation.scoring.split_by_iterations(res) 
    120120    number_of_estimates = len(res.results[0].probabilities[0].reliability_estimate) 
    121     number_of_examples = len(res.results) 
     121    number_of_instances = len(res.results) 
    122122    number_of_folds = len(results_by_fold) 
    123123    results = [0 for _ in xrange(number_of_estimates)] 
     
    142142    # Calculate p-values 
    143143    results = [float(res) / number_of_folds for res in results] 
    144     ps = [p_value_from_r(r, number_of_examples) for r in results] 
     144    ps = [p_value_from_r(r, number_of_instances) for r in results] 
    145145     
    146146    return zip(results, ps, sig, method_list) 
     
    203203        self.estimator = estimator 
    204204     
    205     def __call__(self, examples, weight=None, **kwds): 
     205    def __call__(self, instances, weight=None, **kwds): 
    206206         
    207207        # Calculate borders using cross validation 
    208         res = Orange.evaluation.testing.cross_validation([self.estimator], examples) 
     208        res = Orange.evaluation.testing.cross_validation([self.estimator], instances) 
    209209        all_borders = [] 
    210210        for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)): 
     
    215215         
    216216        # Learn on whole train data 
    217         estimator_classifier = self.estimator(examples) 
     217        estimator_classifier = self.estimator(instances) 
    218218         
    219219        return DescriptiveAnalysisClassifier(estimator_classifier, all_borders, self.desc) 
     
    225225        self.desc = desc 
    226226     
    227     def __call__(self, example, result_type=Orange.core.GetValue): 
    228         predicted, probabilities = self.estimator_classifier(example, Orange.core.GetBoth) 
     227    def __call__(self, instance, result_type=Orange.core.GetValue): 
     228        predicted, probabilities = self.estimator_classifier(instance, Orange.core.GetBoth) 
    229229         
    230230        for borders, estimate in zip(self.all_borders, probabilities.reliability_estimate): 
     
    251251    :rtype: :class:`Orange.evaluation.reliability.SensitivityAnalysisClassifier` 
    252252     
    253     To estimate the reliabilty for given instance, the learning set is extended 
    254     with this instance, labeled with :math:`K + \epsilon (l_{max} - l_{min})`, 
     253    To estimate the reliability of prediction for given instance, 
     254    the learning set is extended with this instance, labeled with 
     255    :math:`K + \epsilon (l_{max} - l_{min})`, 
    255256    where :math:`K` denotes the initial prediction, 
    256257    :math:`\epsilon` is sensitivity parameter and :math:`l_{min}` and 
    257     :math:`l_{max}` denote lower and the upper bound of the learning examples 
    258     . After computing different sensitivity predictions using different 
    259     values of :math:`\epsilon`, the prediction are combined into SAvar and 
    260     SAbias. SAbias can be used as signed estimate or as absolute value of 
    261     SAbias. 
     258    :math:`l_{max}` denote lower and the upper bound of the learning 
     259    instances' labels. After computing different sensitivity predictions 
     260    using different values of :math:`\epsilon`, the prediction are combined 
     261    into SAvar and SAbias. SAbias can be used in a signed or absolute form. 
    262262 
    263263    :math:`SAvar = \\frac{\sum_{\epsilon \in E}(K_{\epsilon} - K_{-\epsilon})}{|E|}` 
     
    270270        self.e = e 
    271271     
    272     def __call__(self, examples, learner): 
    273         min_value = max_value = examples[0].getclass().value 
    274         for ex in examples: 
     272    def __call__(self, instances, learner): 
     273        min_value = max_value = instances[0].getclass().value 
     274        for ex in instances: 
    275275            if ex.getclass().value > max_value: 
    276276                max_value = ex.getclass().value 
    277277            if ex.getclass().value < min_value: 
    278278                min_value = ex.getclass().value 
    279         return SensitivityAnalysisClassifier(self.e, examples, min_value, max_value, learner) 
     279        return SensitivityAnalysisClassifier(self.e, instances, min_value, max_value, learner) 
    280280     
    281281class SensitivityAnalysisClassifier: 
    282     def __init__(self, e, examples, min_value, max_value, learner): 
     282    def __init__(self, e, instances, min_value, max_value, learner): 
    283283        self.e = e 
    284         self.examples = examples 
     284        self.instances = instances 
    285285        self.max_value = max_value 
    286286        self.min_value = min_value 
    287287        self.learner = learner 
    288288     
    289     def __call__(self, example, predicted, probabilities): 
     289    def __call__(self, instance, predicted, probabilities): 
    290290        # Create new dataset 
    291         r_data = Orange.data.Table(self.examples) 
    292          
    293         # Create new example 
    294         modified_example = Orange.data.Instance(example) 
     291        r_data = Orange.data.Table(self.instances) 
     292         
     293        # Create new instance 
     294        modified_instance = Orange.data.Instance(instance) 
    295295         
    296296        # Append it to the data 
    297         r_data.append(modified_example) 
     297        r_data.append(modified_instance) 
    298298         
    299299        # Calculate SAvar & SAbias 
     
    304304            r_data[-1].setclass(predicted.value + eps*(self.max_value - self.min_value)) 
    305305            c = self.learner(r_data) 
    306             k_plus = c(example, Orange.core.GetValue) 
     306            k_plus = c(instance, Orange.core.GetValue) 
    307307             
    308308            # -epsilon 
    309309            r_data[-1].setclass(predicted.value - eps*(self.max_value - self.min_value)) 
    310310            c = self.learner(r_data) 
    311             k_minus = c(example, Orange.core.GetValue) 
     311            k_minus = c(instance, Orange.core.GetValue) 
    312312            #print len(r_data) 
    313313            #print eps*(self.max_value - self.min_value) 
     
    346346        self.m = m 
    347347     
    348     def __call__(self, examples, learner): 
     348    def __call__(self, instances, learner): 
    349349        classifiers = [] 
    350350         
    351351        # Create bagged classifiers using sampling with replacement 
    352352        for _ in xrange(self.m): 
    353             selection = select_with_repeat(len(examples)) 
    354             data = examples.select(selection) 
     353            selection = select_with_repeat(len(instances)) 
     354            data = instances.select(selection) 
    355355            classifiers.append(learner(data)) 
    356356        return BaggingVarianceClassifier(classifiers) 
     
    360360        self.classifiers = classifiers 
    361361     
    362     def __call__(self, example, *args): 
     362    def __call__(self, instance, *args): 
    363363        BAGV = 0 
    364364         
    365365        # Calculate the bagging variance 
    366         bagged_values = [c(example, Orange.core.GetValue).value for c in self.classifiers if c is not None] 
     366        bagged_values = [c(instance, Orange.core.GetValue).value for c in self.classifiers if c is not None] 
    367367         
    368368        k = sum(bagged_values) / len(bagged_values) 
     
    417417        self.learner = learner 
    418418     
    419     def __call__(self, example, *args): 
     419    def __call__(self, instance, *args): 
    420420        LCVer = 0 
    421421        LCVdi = 0 
     
    423423        # Find k nearest neighbors 
    424424         
    425         knn = [ex for ex in self.nearest_neighbours(example, self.k)] 
     425        knn = [ex for ex in self.nearest_neighbours(instance, self.k)] 
    426426         
    427427        # leave one out of prediction error 
     
    465465        self.k = k 
    466466     
    467     def __call__(self, examples, learner): 
     467    def __call__(self, instances, learner): 
    468468        nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor() 
    469469        nearest_neighbours_constructor.distanceConstructor = Orange.distance.EuclideanConstructor() 
    470470         
    471471        distance_id = Orange.data.new_meta_id() 
    472         nearest_neighbours = nearest_neighbours_constructor(examples, 0, distance_id) 
     472        nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id) 
    473473        return CNeighboursClassifier(nearest_neighbours, self.k) 
    474474 
     
    478478        self.k = k 
    479479     
    480     def __call__(self, example, predicted, probabilities): 
     480    def __call__(self, instance, predicted, probabilities): 
    481481        CNK = 0 
    482482         
    483483        # Find k nearest neighbors 
    484484         
    485         knn = [ex for ex in self.nearest_neighbours(example, self.k)] 
     485        knn = [ex for ex in self.nearest_neighbours(instance, self.k)] 
    486486         
    487487        # average label of neighbors 
     
    512512        self.k = k 
    513513     
    514     def __call__(self, examples, *args): 
     514    def __call__(self, instances, *args): 
    515515        nnm = Orange.classification.knn.FindNearestConstructor() 
    516516        nnm.distanceConstructor = Orange.distance.MahalanobisConstructor() 
    517517         
    518518        mid = Orange.data.new_meta_id() 
    519         nnm = nnm(examples, 0, mid) 
     519        nnm = nnm(instances, 0, mid) 
    520520        return MahalanobisClassifier(self.k, nnm, mid) 
    521521 
     
    526526        self.mid = mid 
    527527     
    528     def __call__(self, example, *args): 
     528    def __call__(self, instance, *args): 
    529529        mahalanobis_distance = 0 
    530530         
    531         mahalanobis_distance = sum(ex[self.mid].value for ex in self.nnm(example, self.k)) 
     531        mahalanobis_distance = sum(ex[self.mid].value for ex in self.nnm(instance, self.k)) 
    532532         
    533533        return [ Estimate(mahalanobis_distance, ABSOLUTE, MAHAL_ABSOLUTE) ] 
     
    546546        pass 
    547547     
    548     def __call__(self, examples, *args): 
     548    def __call__(self, instances, *args): 
    549549        dc = Orange.core.DomainContinuizer() 
    550550        dc.classTreatment = Orange.core.DomainContinuizer.Ignore 
     
    552552        dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues 
    553553         
    554         new_domain = dc(examples) 
    555         new_examples = examples.translate(new_domain) 
    556          
    557         X, _, _ = new_examples.to_numpy() 
    558         example_avg = numpy.average(X, 0) 
     554        new_domain = dc(instances) 
     555        new_instances = instances.translate(new_domain) 
     556         
     557        X, _, _ = new_instances.to_numpy() 
     558        instance_avg = numpy.average(X, 0) 
    559559         
    560560        distance_constructor = Orange.distance.MahalanobisConstructor() 
    561         distance = distance_constructor(new_examples) 
    562          
    563         average_example = Orange.data.Instance(new_examples.domain, list(example_avg) + ["?"]) 
    564          
    565         return MahalanobisToCenterClassifier(distance, average_example, new_domain) 
     561        distance = distance_constructor(new_instances) 
     562         
     563        average_instance = Orange.data.Instance(new_instances.domain, list(instance_avg) + ["?"]) 
     564         
     565        return MahalanobisToCenterClassifier(distance, average_instance, new_domain) 
    566566 
    567567class MahalanobisToCenterClassifier: 
    568     def __init__(self, distance, average_example, new_domain): 
     568    def __init__(self, distance, average_instance, new_domain): 
    569569        self.distance = distance 
    570         self.average_example = average_example 
     570        self.average_instance = average_instance 
    571571        self.new_domain = new_domain 
    572572     
    573     def __call__(self, example, *args): 
    574          
    575         ex = Orange.data.Instance(self.new_domain, example) 
    576          
    577         mahalanobis_to_center = self.distance(ex, self.average_example) 
     573    def __call__(self, instance, *args): 
     574         
     575        inst = Orange.data.Instance(self.new_domain, instance) 
     576         
     577        mahalanobis_to_center = self.distance(inst, self.average_instance) 
    578578         
    579579        return [ Estimate(mahalanobis_to_center, ABSOLUTE, MAHAL_TO_CENTER_ABSOLUTE) ] 
     
    599599        self.cnk = cnk 
    600600     
    601     def __call__(self, examples, learner): 
    602         bagv_classifier = self.bagv(examples, learner) 
    603         cnk_classifier = self.cnk(examples, learner) 
     601    def __call__(self, instances, learner): 
     602        bagv_classifier = self.bagv(instances, learner) 
     603        cnk_classifier = self.cnk(instances, learner) 
    604604        return BaggingVarianceCNeighboursClassifier(bagv_classifier, cnk_classifier) 
    605605 
     
    609609        self.cnk_classifier = cnk_classifier 
    610610     
    611     def __call__(self, example, predicted, probabilities): 
    612         bagv_estimates = self.bagv_classifier(example, predicted, probabilities) 
    613         cnk_estimates = self.cnk_classifier(example, predicted, probabilities) 
     611    def __call__(self, instance, predicted, probabilities): 
     612        bagv_estimates = self.bagv_classifier(instance, predicted, probabilities) 
     613        cnk_estimates = self.cnk_classifier(instance, predicted, probabilities) 
    614614         
    615615        bvck_value = (bagv_estimates[0].estimate + cnk_estimates[1].estimate)/2 
     
    623623        pass 
    624624     
    625     def __call__(self, examples, learner): 
    626         res = Orange.evaluation.testing.cross_validation([learner], examples) 
     625    def __call__(self, instances, learner): 
     626        res = Orange.evaluation.testing.cross_validation([learner], instances) 
    627627        prediction_errors = get_prediction_error_list(res) 
    628628         
    629         new_domain = Orange.data.Domain(examples.domain.attributes, Orange.core.FloatVariable("pe")) 
    630         new_dataset = Orange.data.Table(new_domain, examples) 
    631          
    632         for example, prediction_error in izip(new_dataset, prediction_errors): 
    633             example.set_class(prediction_error) 
     629        new_domain = Orange.data.Domain(instances.domain.attributes, Orange.core.FloatVariable("pe")) 
     630        new_dataset = Orange.data.Table(new_domain, instances) 
     631         
     632        for instance, prediction_error in izip(new_dataset, prediction_errors): 
     633            instance.set_class(prediction_error) 
    634634         
    635635        rf = Orange.ensemble.forest.RandomForestLearner() 
     
    643643        self.new_domain = new_domain 
    644644     
    645     def __call__(self, example, predicted, probabilities): 
    646         new_example = Orange.data.Instance(self.new_domain, example) 
    647         value = self.rf_classifier(new_example, Orange.core.GetValue) 
     645    def __call__(self, instance, predicted, probabilities): 
     646        new_instance = Orange.data.Instance(self.new_domain, instance) 
     647        value = self.rf_classifier(new_instance, Orange.core.GetValue) 
    648648         
    649649        return [Estimate(value.value, SIGNED, SABIAS_SIGNED)] 
     
    685685         
    686686     
    687     def __call__(self, examples, weight=None, **kwds): 
     687    def __call__(self, instances, weight=None, **kwds): 
    688688        """Learn from the given table of data instances. 
    689689         
     
    698698        new_domain = None 
    699699         
    700         if examples.domain.class_var.var_type != Orange.data.variable.Continuous.Continuous: 
     700        if instances.domain.class_var.var_type != Orange.data.variable.Continuous.Continuous: 
    701701            raise Exception("This method only works on data with continuous class.") 
    702702         
    703         return Classifier(examples, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier) 
    704      
    705     def internal_cross_validation(self, examples, folds=10): 
     703        return Classifier(instances, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier) 
     704     
     705    def internal_cross_validation(self, instances, folds=10): 
    706706        """ Perform the internal cross validation for getting the best 
    707707        reliability estimate. It uses the reliability estimators defined in 
     
    710710        Returns the id of the method that scored the best. 
    711711 
    712         :param examples: Data instances to use for ICV. 
    713         :type examples: :class:`Orange.data.Table` 
     712        :param instances: Data instances to use for ICV. 
     713        :type instances: :class:`Orange.data.Table` 
    714714        :param folds: number of folds for ICV. 
    715715        :type folds: int 
     
    717717 
    718718        """ 
    719         res = Orange.evaluation.testing.cross_validation([self], examples, folds=folds) 
     719        res = Orange.evaluation.testing.cross_validation([self], instances, folds=folds) 
    720720        results = get_pearson_r(res) 
    721721        sorted_results = sorted(results) 
    722722        return sorted_results[-1][3] 
    723723     
    724     def internal_cross_validation_testing(self, examples, folds=10): 
     724    def internal_cross_validation_testing(self, instances, folds=10): 
    725725        """ Perform internal cross validation (as in Automatic selection of 
    726726        reliability estimates for individual regression predictions, 
     
    728728        that scored best on this data. 
    729729 
    730         :param examples: Data instances to use for ICV. 
    731         :type examples: :class:`Orange.data.Table` 
     730        :param instances: Data instances to use for ICV. 
     731        :type instances: :class:`Orange.data.Table` 
    732732        :param folds: number of folds for ICV. 
    733733        :type folds: int 
     
    735735 
    736736        """ 
    737         cv_indices = Orange.core.MakeRandomIndicesCV(examples, folds) 
     737        cv_indices = Orange.core.MakeRandomIndicesCV(instances, folds) 
    738738         
    739739        list_of_rs = [] 
     
    742742         
    743743        for fold in xrange(folds): 
    744             data = examples.select(cv_indices, fold) 
     744            data = instances.select(cv_indices, fold) 
    745745            if len(data) < 10: 
    746746                res = Orange.evaluation.testing.leave_one_out([self], data) 
     
    767767    """ 
    768768 
    769     def __init__(self, examples, box_learner, estimators, blending, blending_domain, rf_classifier, **kwds): 
     769    def __init__(self, instances, box_learner, estimators, blending, blending_domain, rf_classifier, **kwds): 
    770770        self.__dict__.update(kwds) 
    771         self.examples = examples 
     771        self.instances = instances 
    772772        self.box_learner = box_learner 
    773773        self.estimators = estimators 
     
    777777         
    778778        # Train the learner with original data 
    779         self.classifier = box_learner(examples) 
     779        self.classifier = box_learner(instances) 
    780780         
    781781        # Train all the estimators and create their classifiers 
    782         self.estimation_classifiers = [estimator(examples, box_learner) for estimator in estimators] 
    783      
    784     def __call__(self, example, result_type=Orange.core.GetValue): 
     782        self.estimation_classifiers = [estimator(instances, box_learner) for estimator in estimators] 
     783     
     784    def __call__(self, instance, result_type=Orange.core.GetValue): 
    785785        """ 
    786786        Classify and estimate reliability of estimation for a new instance. 
     
    802802              :class:`Orange.statistics.Distribution` or a tuple with both 
    803803        """ 
    804         predicted, probabilities = self.classifier(example, Orange.core.GetBoth) 
     804        predicted, probabilities = self.classifier(instance, Orange.core.GetBoth) 
    805805         
    806806        # Create a place holder for estimates 
     
    813813        # Calculate all the estimates and add them to the results 
    814814        for estimate in self.estimation_classifiers: 
    815             probabilities.reliability_estimate.extend(estimate(example, predicted, probabilities)) 
     815            probabilities.reliability_estimate.extend(estimate(instance, predicted, probabilities)) 
    816816         
    817817        # Return the appropriate type of result 
Note: See TracChangeset for help on using the changeset viewer.