# source:orange/Orange/evaluation/reliability.py@9724:318e91106d47

Revision 9724:318e91106d47, 30.7 KB checked in by markotoplak, 2 years ago (diff)

Renames in Orange.distance.

Line
1import Orange
2
3import random
4import statc
5import math
6import warnings
7import numpy
8
9from collections import defaultdict
10from itertools import izip
11
12# Labels and final variables
13labels = ["SAvar", "SAbias", "BAGV", "CNK", "LCV", "BVCK", "Mahalanobis", "ICV"]
14
15"""
16# All the estimators calculation constants
17DO_SA = 0
18DO_BAGV = 1
19DO_CNK = 2
20DO_LCV = 3
21DO_BVCK = 4
22DO_MAHAL = 5
23"""
24
25# All the estimator method constants
26SAVAR_ABSOLUTE = 0
27SABIAS_SIGNED = 1
28SABIAS_ABSOLUTE = 2
29BAGV_ABSOLUTE = 3
30CNK_SIGNED = 4
31CNK_ABSOLUTE = 5
32LCV_ABSOLUTE = 6
33BVCK_ABSOLUTE = 7
34MAHAL_ABSOLUTE = 8
35BLENDING_ABSOLUTE = 9
36ICV_METHOD = 10
37MAHAL_TO_CENTER_ABSOLUTE = 13
38
39# Type of estimator constant
40SIGNED = 0
41ABSOLUTE = 1
42
43# Names of all the estimator methods
44METHOD_NAME = {0: "SAvar absolute", 1: "SAbias signed", 2: "SAbias absolute",
45               3: "BAGV absolute", 4: "CNK signed", 5: "CNK absolute",
46               6: "LCV absolute", 7: "BVCK_absolute", 8: "Mahalanobis absolute",
47               9: "BLENDING absolute", 10: "ICV", 11: "RF Variance", 12: "RF Std",
48               13: "Mahalanobis to center"}
49
50select_with_repeat = Orange.core.MakeRandomIndicesMultiple()
51select_with_repeat.random_generator = Orange.misc.Random()
52
53def get_reliability_estimation_list(res, i):
54    return [result.probabilities[0].reliability_estimate[i].estimate for result in res.results], res.results[0].probabilities[0].reliability_estimate[i].signed_or_absolute, res.results[0].probabilities[0].reliability_estimate[i].method
55
56def get_prediction_error_list(res):
57    return [result.actualClass - result.classes[0] for result in res.results]
58
59def get_description_list(res, i):
60    return [result.probabilities[0].reliability_estimate[i].text_description for result in res.results]
61
62def get_pearson_r(res):
63    """
64    :param res: results of evaluation, done using learners,
65        wrapped into :class:Orange.evaluation.reliability.Classifier.
66    :type res: :class:Orange.evaluation.testing.ExperimentResults
67
68    Return Pearson's coefficient between the prediction error and each of the
69    used reliability estimates. Also, return the p-value of each of
70    the coefficients.
71    """
72    prediction_error = get_prediction_error_list(res)
73    results = []
74    for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
75        reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
76        try:
77            if signed_or_absolute == SIGNED:
78                r, p = statc.pearsonr(prediction_error, reliability_estimate)
79            else:
80                r, p = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate)
81        except Exception:
82            r = p = float("NaN")
83        results.append((r, p, signed_or_absolute, method))
84    return results
85
86def get_spearman_r(res):
87    """
88    :param res: results of evaluation, done using learners,
89        wrapped into :class:Orange.evaluation.reliability.Classifier.
90    :type res: :class:Orange.evaluation.testing.ExperimentResults
91
92    Return Spearman's coefficient between the prediction error and each of the
93    used reliability estimates. Also, return the p-value of each of
94    the coefficients.
95    """
96    prediction_error = get_prediction_error_list(res)
97    results = []
98    for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
99        reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
100        try:
101            if signed_or_absolute == SIGNED:
102                r, p = statc.spearmanr(prediction_error, reliability_estimate)
103            else:
104                r, p = statc.spearmanr([abs(pe) for pe in prediction_error], reliability_estimate)
105        except Exception:
106            r = p = float("NaN")
107        results.append((r, p, signed_or_absolute, method))
108    return results
109
110def get_pearson_r_by_iterations(res):
111    """
112    :param res: results of evaluation, done using learners,
113        wrapped into :class:Orange.evaluation.reliability.Classifier.
114    :type res: :class:Orange.evaluation.testing.ExperimentResults
115
116    Return average Pearson's coefficient over all folds between prediction error
117    and each of the used estimates.
118    """
119    results_by_fold = Orange.evaluation.scoring.split_by_iterations(res)
120    number_of_estimates = len(res.results[0].probabilities[0].reliability_estimate)
121    number_of_instances = len(res.results)
122    number_of_folds = len(results_by_fold)
123    results = [0 for _ in xrange(number_of_estimates)]
124    sig = [0 for _ in xrange(number_of_estimates)]
125    method_list = [0 for _ in xrange(number_of_estimates)]
126
127    for res in results_by_fold:
128        prediction_error = get_prediction_error_list(res)
129        for i in xrange(number_of_estimates):
130            reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
131            try:
132                if signed_or_absolute == SIGNED:
133                    r, _ = statc.pearsonr(prediction_error, reliability_estimate)
134                else:
135                    r, _ = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate)
136            except Exception:
137                r = float("NaN")
138            results[i] += r
139            sig[i] = signed_or_absolute
140            method_list[i] = method
141
142    # Calculate p-values
143    results = [float(res) / number_of_folds for res in results]
144    ps = [p_value_from_r(r, number_of_instances) for r in results]
145
146    return zip(results, ps, sig, method_list)
147
148def p_value_from_r(r, n):
149    """
150    Calculate p-value from the paerson coefficient and the sample size.
151    """
152    df = n - 2
153    t = r * (df /((-r + 1.0 + 1e-30) * (r + 1.0 + 1e-30)) )**0.5
154    return statc.betai (df * 0.5, 0.5, df/(df + t*t))
155
156class Estimate:
157    """
158    Reliability estimate. Contains attributes that describe the results of
159    reliability estimation.
160
161    .. attribute:: estimate
162
163        A numerical reliability estimate.
164
165    .. attribute:: signed_or_absolute
166
167        Determines whether the method used gives a signed or absolute result.
168        Has a value of either :obj:SIGNED or :obj:ABSOLUTE.
169
170    .. attribute:: method
171
172        An integer ID of reliability estimation method used.
173
174    .. attribute:: method_name
175
176        Name (string) of reliability estimation method used.
177
178    .. attribute:: icv_method
179
180        An integer ID of reliability estimation method that performed best,
181        as determined by ICV, and of which estimate is stored in the
182        :obj:estimate field. (:obj:None when ICV was not used.)
183
184    .. attribute:: icv_method_name
185
186        Name (string) of reliability estimation method that performed best,
187        as determined by ICV. (:obj:None when ICV was not used.)
188
189    """
190    def __init__(self, estimate, signed_or_absolute, method, icv_method = -1):
191        self.estimate = estimate
192        self.signed_or_absolute = signed_or_absolute
193        self.method = method
194        self.method_name = METHOD_NAME[method]
195        self.icv_method = icv_method
196        self.icv_method_name = METHOD_NAME[icv_method] if icv_method != -1 else ""
197        self.text_description = None
198
199class DescriptiveAnalysis:
200    def __init__(self, estimator, desc=["high", "medium", "low"], procentage=[0.00, 0.33, 0.66]):
201        self.desc = desc
202        self.procentage = procentage
203        self.estimator = estimator
204
205    def __call__(self, instances, weight=None, **kwds):
206
207        # Calculate borders using cross validation
208        res = Orange.evaluation.testing.cross_validation([self.estimator], instances)
209        all_borders = []
210        for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
211            estimates, signed_or_absolute, method = get_reliability_estimation_list(res, i)
212            sorted_estimates = sorted( abs(x) for x in estimates)
213            borders = [sorted_estimates[int(len(estimates)*p)-1]  for p in self.procentage]
214            all_borders.append(borders)
215
216        # Learn on whole train data
217        estimator_classifier = self.estimator(instances)
218
219        return DescriptiveAnalysisClassifier(estimator_classifier, all_borders, self.desc)
220
221class DescriptiveAnalysisClassifier:
222    def __init__(self, estimator_classifier, all_borders, desc):
223        self.estimator_classifier = estimator_classifier
224        self.all_borders = all_borders
225        self.desc = desc
226
227    def __call__(self, instance, result_type=Orange.core.GetValue):
228        predicted, probabilities = self.estimator_classifier(instance, Orange.core.GetBoth)
229
230        for borders, estimate in zip(self.all_borders, probabilities.reliability_estimate):
231            estimate.text_description = self.desc[0]
232            for lower_border, text_desc in zip(borders, self.desc):
233                if estimate.estimate >= lower_border:
234                    estimate.text_description = text_desc
235
236        # Return the appropriate type of result
237        if result_type == Orange.core.GetValue:
238            return predicted
239        elif result_type == Orange.core.GetProbabilities:
240            return probabilities
241        else:
242            return predicted, probabilities
243
244class SensitivityAnalysis:
245    """
246
247    :param e: List of possible :math:\epsilon values for SAvar and SAbias
248        reliability estimates.
249    :type e: list of floats
250
251    :rtype: :class:Orange.evaluation.reliability.SensitivityAnalysisClassifier
252
253    To estimate the reliability of prediction for given instance,
254    the learning set is extended with this instance, labeled with
255    :math:K + \epsilon (l_{max} - l_{min}),
256    where :math:K denotes the initial prediction,
257    :math:\epsilon is sensitivity parameter and :math:l_{min} and
258    :math:l_{max} denote lower and the upper bound of the learning
259    instances' labels. After computing different sensitivity predictions
260    using different values of :math:\epsilon, the prediction are combined
261    into SAvar and SAbias. SAbias can be used in a signed or absolute form.
262
263    :math:SAvar = \\frac{\sum_{\epsilon \in E}(K_{\epsilon} - K_{-\epsilon})}{|E|}
264
265    :math:SAbias = \\frac{\sum_{\epsilon \in E} (K_{\epsilon} - K ) + (K_{-\epsilon} - K)}{2 |E|}
266
267
268    """
269    def __init__(self, e=[0.01, 0.1, 0.5, 1.0, 2.0]):
270        self.e = e
271
272    def __call__(self, instances, learner):
273        min_value = max_value = instances[0].getclass().value
274        for ex in instances:
275            if ex.getclass().value > max_value:
276                max_value = ex.getclass().value
277            if ex.getclass().value < min_value:
278                min_value = ex.getclass().value
279        return SensitivityAnalysisClassifier(self.e, instances, min_value, max_value, learner)
280
281class SensitivityAnalysisClassifier:
282    def __init__(self, e, instances, min_value, max_value, learner):
283        self.e = e
284        self.instances = instances
285        self.max_value = max_value
286        self.min_value = min_value
287        self.learner = learner
288
289    def __call__(self, instance, predicted, probabilities):
290        # Create new dataset
291        r_data = Orange.data.Table(self.instances)
292
293        # Create new instance
294        modified_instance = Orange.data.Instance(instance)
295
296        # Append it to the data
297        r_data.append(modified_instance)
298
299        # Calculate SAvar & SAbias
300        SAvar = SAbias = 0
301
302        for eps in self.e:
303            # +epsilon
304            r_data[-1].setclass(predicted.value + eps*(self.max_value - self.min_value))
305            c = self.learner(r_data)
306            k_plus = c(instance, Orange.core.GetValue)
307
308            # -epsilon
309            r_data[-1].setclass(predicted.value - eps*(self.max_value - self.min_value))
310            c = self.learner(r_data)
311            k_minus = c(instance, Orange.core.GetValue)
312            #print len(r_data)
313            #print eps*(self.max_value - self.min_value)
314            #print k_plus
315            #print k_minus
316            # calculate part SAvar and SAbias
317            SAvar += k_plus.value - k_minus.value
318            SAbias += k_plus.value + k_minus.value - 2*predicted.value
319
320        SAvar /= len(self.e)
321        SAbias /= 2*len(self.e)
322
323        return [Estimate(SAvar, ABSOLUTE, SAVAR_ABSOLUTE),
324                Estimate(SAbias, SIGNED, SABIAS_SIGNED),
325                Estimate(abs(SAbias), ABSOLUTE, SABIAS_ABSOLUTE)]
326
327class BaggingVariance:
328    """
329
330    :param m: Number of bagging models to be used with BAGV estimate
331    :type m: int
332
333    :rtype: :class:Orange.evaluation.reliability.BaggingVarianceClassifier
334
335    :math:m different bagging models are constructed and used to estimate
336    the value of dependent variable for a given instance. The variance of
337    those predictions is used as a prediction reliability estimate.
338
339    :math:BAGV = \\frac{1}{m} \sum_{i=1}^{m} (K_i - K)^2
340
341    where :math:K = \\frac{\sum_{i=1}^{m} K_i}{m} and :math:K_i are
342    predictions of individual constructed models.
343
344    """
345    def __init__(self, m=50):
346        self.m = m
347
348    def __call__(self, instances, learner):
349        classifiers = []
350
351        # Create bagged classifiers using sampling with replacement
352        for _ in xrange(self.m):
353            selection = select_with_repeat(len(instances))
354            data = instances.select(selection)
355            classifiers.append(learner(data))
356        return BaggingVarianceClassifier(classifiers)
357
358class BaggingVarianceClassifier:
359    def __init__(self, classifiers):
360        self.classifiers = classifiers
361
362    def __call__(self, instance, *args):
363        BAGV = 0
364
365        # Calculate the bagging variance
366        bagged_values = [c(instance, Orange.core.GetValue).value for c in self.classifiers if c is not None]
367
368        k = sum(bagged_values) / len(bagged_values)
369
370        BAGV = sum( (bagged_value - k)**2 for bagged_value in bagged_values) / len(bagged_values)
371
372        return [Estimate(BAGV, ABSOLUTE, BAGV_ABSOLUTE)]
373
374class LocalCrossValidation:
375    """
376
377    :param k: Number of nearest neighbours used in LCV estimate
378    :type k: int
379
380    :rtype: :class:Orange.evaluation.reliability.LocalCrossValidationClassifier
381
382    :math:k nearest neighbours to the given instance are found and put in
383    a separate data set. On this data set, a leave-one-out validation is
384    performed. Reliability estimate is then the distance weighted absolute
385    prediction error.
386
387    If a special value 0 is passed as :math:k (as is by default),
388    it is set as 1/20 of data set size (or 5, whichever is greater).
389
390    1. Determine the set of k nearest neighours :math:N = { (x_1, c_1),...,
391       (x_k, c_k)}.
392    2. On this set, compute leave-one-out predictions :math:K_i and
393       prediction errors :math:E_i = | C_i - K_i |.
394    3. :math:LCV(x) = \\frac{ \sum_{(x_i, c_i) \in N} d(x_i, x) * E_i }{ \sum_{(x_i, c_i) \in N} d(x_i, x) }
395
396    """
397    def __init__(self, k=0):
398        self.k = k
399
400    def __call__(self, instances, learner):
401        nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor()
402        nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean()
403
404        distance_id = Orange.data.new_meta_id()
405        nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id)
406
407        if self.k == 0:
408            self.k = max(5, len(instances)/20)
409
410        return LocalCrossValidationClassifier(distance_id, nearest_neighbours, self.k, learner)
411
412class LocalCrossValidationClassifier:
413    def __init__(self, distance_id, nearest_neighbours, k, learner):
414        self.distance_id = distance_id
415        self.nearest_neighbours = nearest_neighbours
416        self.k = k
417        self.learner = learner
418
419    def __call__(self, instance, *args):
420        LCVer = 0
421        LCVdi = 0
422
423        # Find k nearest neighbors
424
425        knn = [ex for ex in self.nearest_neighbours(instance, self.k)]
426
427        # leave one out of prediction error
428        for i in xrange(len(knn)):
429            train = knn[:]
430            del train[i]
431
432            classifier = self.learner(Orange.data.Table(train))
433
434            returned_value = classifier(knn[i], Orange.core.GetValue)
435
436            e = abs(knn[i].getclass().value - returned_value.value)
437
438            LCVer += e * math.exp(-knn[i][self.distance_id])
439            LCVdi += math.exp(-knn[i][self.distance_id])
440
441        LCV = LCVer / LCVdi if LCVdi != 0 else 0
442        if math.isnan(LCV):
443            LCV = 0.0
444        return [ Estimate(LCV, ABSOLUTE, LCV_ABSOLUTE) ]
445
446class CNeighbours:
447    """
448
449    :param k: Number of nearest neighbours used in CNK estimate
450    :type k: int
451
452    :rtype: :class:Orange.evaluation.reliability.CNeighboursClassifier
453
454    CNK is defined for an unlabeled instance as a difference between average
455    label of its nearest neighbours and its prediction. CNK can be used as a
456    signed or absolute estimate.
457
458    :math:CNK = \\frac{\sum_{i=1}^{k}C_i}{k} - K
459
460    where :math:k denotes number of neighbors, C :sub:i denotes neighbours'
461    labels and :math:K denotes the instance's prediction.
462
463    """
464    def __init__(self, k=5):
465        self.k = k
466
467    def __call__(self, instances, learner):
468        nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor()
469        nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean()
470
471        distance_id = Orange.data.new_meta_id()
472        nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id)
473        return CNeighboursClassifier(nearest_neighbours, self.k)
474
475class CNeighboursClassifier:
476    def __init__(self, nearest_neighbours, k):
477        self.nearest_neighbours = nearest_neighbours
478        self.k = k
479
480    def __call__(self, instance, predicted, probabilities):
481        CNK = 0
482
483        # Find k nearest neighbors
484
485        knn = [ex for ex in self.nearest_neighbours(instance, self.k)]
486
487        # average label of neighbors
488        for ex in knn:
489            CNK += ex.getclass().value
490
491        CNK /= self.k
492        CNK -= predicted.value
493
494        return [Estimate(CNK, SIGNED, CNK_SIGNED),
495                Estimate(abs(CNK), ABSOLUTE, CNK_ABSOLUTE)]
496
497class Mahalanobis:
498    """
499
500    :param k: Number of nearest neighbours used in Mahalanobis estimate.
501    :type k: int
502
503    :rtype: :class:Orange.evaluation.reliability.MahalanobisClassifier
504
505    Mahalanobis distance reliability estimate is defined as
506    mahalanobis distance <http://en.wikipedia.org/wiki/Mahalanobis_distance>_
507    to the evaluated instance's :math:k nearest neighbours.
508
509
510    """
511    def __init__(self, k=3):
512        self.k = k
513
514    def __call__(self, instances, *args):
515        nnm = Orange.classification.knn.FindNearestConstructor()
516        nnm.distanceConstructor = Orange.distance.Mahalanobis()
517
518        mid = Orange.data.new_meta_id()
519        nnm = nnm(instances, 0, mid)
520        return MahalanobisClassifier(self.k, nnm, mid)
521
522class MahalanobisClassifier:
523    def __init__(self, k, nnm, mid):
524        self.k = k
525        self.nnm = nnm
526        self.mid = mid
527
528    def __call__(self, instance, *args):
529        mahalanobis_distance = 0
530
531        mahalanobis_distance = sum(ex[self.mid].value for ex in self.nnm(instance, self.k))
532
533        return [ Estimate(mahalanobis_distance, ABSOLUTE, MAHAL_ABSOLUTE) ]
534
535class MahalanobisToCenter:
536    """
537    :rtype: :class:Orange.evaluation.reliability.MahalanobisToCenterClassifier
538
539    Mahalanobis distance to center reliability estimate is defined as a
540    mahalanobis distance <http://en.wikipedia.org/wiki/Mahalanobis_distance>_
541    between the predicted instance and the centroid of the data.
542
543
544    """
545    def __init__(self):
546        pass
547
548    def __call__(self, instances, *args):
549        dc = Orange.core.DomainContinuizer()
550        dc.classTreatment = Orange.core.DomainContinuizer.Ignore
551        dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan
552        dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues
553
554        new_domain = dc(instances)
555        new_instances = instances.translate(new_domain)
556
557        X, _, _ = new_instances.to_numpy()
558        instance_avg = numpy.average(X, 0)
559
560        distance_constructor = Orange.distance.Mahalanobis()
561        distance = distance_constructor(new_instances)
562
563        average_instance = Orange.data.Instance(new_instances.domain, list(instance_avg) + ["?"])
564
565        return MahalanobisToCenterClassifier(distance, average_instance, new_domain)
566
567class MahalanobisToCenterClassifier:
568    def __init__(self, distance, average_instance, new_domain):
569        self.distance = distance
570        self.average_instance = average_instance
571        self.new_domain = new_domain
572
573    def __call__(self, instance, *args):
574
575        inst = Orange.data.Instance(self.new_domain, instance)
576
577        mahalanobis_to_center = self.distance(inst, self.average_instance)
578
579        return [ Estimate(mahalanobis_to_center, ABSOLUTE, MAHAL_TO_CENTER_ABSOLUTE) ]
580
581
582class BaggingVarianceCNeighbours:
583    """
584
585    :param bagv: Instance of Bagging Variance estimator.
586    :type bagv: :class:Orange.evaluation.reliability.BaggingVariance
587
588    :param cnk: Instance of CNK estimator.
589    :type cnk: :class:Orange.evaluation.reliability.CNeighbours
590
591    :rtype: :class:Orange.evaluation.reliability.BaggingVarianceCNeighboursClassifier
592
593    BVCK is a combination (average) of Bagging variance and local modeling of
594    prediction error.
595
596    """
597    def __init__(self, bagv=BaggingVariance(), cnk=CNeighbours()):
598        self.bagv = bagv
599        self.cnk = cnk
600
601    def __call__(self, instances, learner):
602        bagv_classifier = self.bagv(instances, learner)
603        cnk_classifier = self.cnk(instances, learner)
604        return BaggingVarianceCNeighboursClassifier(bagv_classifier, cnk_classifier)
605
606class BaggingVarianceCNeighboursClassifier:
607    def __init__(self, bagv_classifier, cnk_classifier):
608        self.bagv_classifier = bagv_classifier
609        self.cnk_classifier = cnk_classifier
610
611    def __call__(self, instance, predicted, probabilities):
612        bagv_estimates = self.bagv_classifier(instance, predicted, probabilities)
613        cnk_estimates = self.cnk_classifier(instance, predicted, probabilities)
614
615        bvck_value = (bagv_estimates[0].estimate + cnk_estimates[1].estimate)/2
616        bvck_estimates = [ Estimate(bvck_value, ABSOLUTE, BVCK_ABSOLUTE) ]
617        bvck_estimates.extend(bagv_estimates)
618        bvck_estimates.extend(cnk_estimates)
619        return bvck_estimates
620
621class ErrorPredicting:
622    def __init__(self):
623        pass
624
625    def __call__(self, instances, learner):
626        res = Orange.evaluation.testing.cross_validation([learner], instances)
627        prediction_errors = get_prediction_error_list(res)
628
629        new_domain = Orange.data.Domain(instances.domain.attributes, Orange.core.FloatVariable("pe"))
630        new_dataset = Orange.data.Table(new_domain, instances)
631
632        for instance, prediction_error in izip(new_dataset, prediction_errors):
633            instance.set_class(prediction_error)
634
635        rf = Orange.ensemble.forest.RandomForestLearner()
636        rf_classifier = rf(new_dataset)
637
638        return ErrorPredictingClassification(rf_classifier, new_domain)
639
640class ErrorPredictingClassification:
641    def __init__(self, rf_classifier, new_domain):
642        self.rf_classifier = rf_classifier
643        self.new_domain = new_domain
644
645    def __call__(self, instance, predicted, probabilities):
646        new_instance = Orange.data.Instance(self.new_domain, instance)
647        value = self.rf_classifier(new_instance, Orange.core.GetValue)
648
649        return [Estimate(value.value, SIGNED, SABIAS_SIGNED)]
650
651class Learner:
652    """
653    Reliability estimation wrapper around a learner we want to test.
654    Different reliability estimation algorithms can be used on the
655    chosen learner. This learner works as any other and can be used as one,
656    but it returns the classifier, wrapped into an instance of
657    :class:Orange.evaluation.reliability.Classifier.
658
659    :param box_learner: Learner we want to wrap into a reliability estimation
660        classifier.
661    :type box_learner: learner
662
663    :param estimators: List of different reliability estimation methods we
664                       want to use on the chosen learner.
665    :type estimators: list of reliability estimators
666
667    :param name: Name of this reliability learner
668    :type name: string
669
670    :rtype: :class:Orange.evaluation.reliability.Learner
671    """
672    def __init__(self, box_learner, name="Reliability estimation",
673                 estimators = [SensitivityAnalysis(),
674                               LocalCrossValidation(),
675                               BaggingVarianceCNeighbours(),
676                               Mahalanobis(),
677                               MahalanobisToCenter()
678                               ],
679                 **kwds):
680        self.__dict__.update(kwds)
681        self.name = name
682        self.estimators = estimators
683        self.box_learner = box_learner
684        self.blending = False
685
686
687    def __call__(self, instances, weight=None, **kwds):
688        """Learn from the given table of data instances.
689
690        :param instances: Data instances to learn from.
691        :type instances: Orange.data.Table
692        :param weight: Id of meta attribute with weights of instances
693        :type weight: integer
694        :rtype: :class:Orange.evaluation.reliability.Classifier
695        """
696
697        blending_classifier = None
698        new_domain = None
699
700        if instances.domain.class_var.var_type != Orange.data.variable.Continuous.Continuous:
701            raise Exception("This method only works on data with continuous class.")
702
703        return Classifier(instances, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier)
704
705    def internal_cross_validation(self, instances, folds=10):
706        """ Perform the internal cross validation for getting the best
707        reliability estimate. It uses the reliability estimators defined in
708        estimators attribute.
709
710        Returns the id of the method that scored the best.
711
712        :param instances: Data instances to use for ICV.
713        :type instances: :class:Orange.data.Table
714        :param folds: number of folds for ICV.
715        :type folds: int
716        :rtype: int
717
718        """
719        res = Orange.evaluation.testing.cross_validation([self], instances, folds=folds)
720        results = get_pearson_r(res)
721        sorted_results = sorted(results)
722        return sorted_results[-1][3]
723
724    def internal_cross_validation_testing(self, instances, folds=10):
725        """ Perform internal cross validation (as in Automatic selection of
726        reliability estimates for individual regression predictions,
727        Zoran Bosnic, 2010) and return id of the method
728        that scored best on this data.
729
730        :param instances: Data instances to use for ICV.
731        :type instances: :class:Orange.data.Table
732        :param folds: number of folds for ICV.
733        :type folds: int
734        :rtype: int
735
736        """
737        cv_indices = Orange.core.MakeRandomIndicesCV(instances, folds)
738
739        list_of_rs = []
740
741        sum_of_rs = defaultdict(float)
742
743        for fold in xrange(folds):
744            data = instances.select(cv_indices, fold)
745            if len(data) < 10:
746                res = Orange.evaluation.testing.leave_one_out([self], data)
747            else:
748                res = Orange.evaluation.testing.cross_validation([self], data)
749            results = get_pearson_r(res)
750            for r, _, _, method in results:
751                sum_of_rs[method] += r
752        sorted_sum_of_rs = sorted(sum_of_rs.items(), key=lambda estimate: estimate[1], reverse=True)
753        return sorted_sum_of_rs[0][0]
754
755    labels = ["SAvar", "SAbias", "BAGV", "CNK", "LCV", "BVCK", "Mahalanobis", "ICV"]
756
757class Classifier:
758    """
759    A reliability estimation wrapper for classifiers.
760
761    What distinguishes this classifier is that the returned probabilities (if
762    :obj:Orange.classification.Classifier.GetProbabilities or
763    :obj:Orange.classification.Classifier.GetBoth is passed) contain an
764    additional attribute :obj:reliability_estimate, which is an instance of
765     :class:~Orange.evaluation.reliability.Estimate.
766
767    """
768
769    def __init__(self, instances, box_learner, estimators, blending, blending_domain, rf_classifier, **kwds):
770        self.__dict__.update(kwds)
771        self.instances = instances
772        self.box_learner = box_learner
773        self.estimators = estimators
774        self.blending = blending
775        self.blending_domain = blending_domain
776        self.rf_classifier = rf_classifier
777
778        # Train the learner with original data
779        self.classifier = box_learner(instances)
780
781        # Train all the estimators and create their classifiers
782        self.estimation_classifiers = [estimator(instances, box_learner) for estimator in estimators]
783
784    def __call__(self, instance, result_type=Orange.core.GetValue):
785        """
786        Classify and estimate reliability of estimation for a new instance.
787        When :obj:result_type is set to
788        :obj:Orange.classification.Classifier.GetBoth or
789        :obj:Orange.classification.Classifier.GetProbabilities,
790        an additional attribute :obj:reliability_estimate,
791        which is an instance of
792        :class:~Orange.evaluation.reliability.Estimate,
793        is added to the distribution object.
794
795        :param instance: instance to be classified.
796        :type instance: :class:Orange.data.Instance
797        :param result_type: :class:Orange.classification.Classifier.GetValue or \
798              :class:Orange.classification.Classifier.GetProbabilities or
799              :class:Orange.classification.Classifier.GetBoth
800
801        :rtype: :class:Orange.data.Value,
802              :class:Orange.statistics.Distribution or a tuple with both
803        """
804        predicted, probabilities = self.classifier(instance, Orange.core.GetBoth)
805
806        # Create a place holder for estimates
807        if probabilities is None:
808            probabilities = Orange.statistics.distribution.Continuous()
809        #with warnings.catch_warnings():
810        #    warnings.simplefilter("ignore")
811        probabilities.setattr('reliability_estimate', [])
812
813        # Calculate all the estimates and add them to the results
814        for estimate in self.estimation_classifiers:
815            probabilities.reliability_estimate.extend(estimate(instance, predicted, probabilities))
816
817        # Return the appropriate type of result
818        if result_type == Orange.core.GetValue:
819            return predicted
820        elif result_type == Orange.core.GetProbabilities:
821            return probabilities
822        else:
823            return predicted, probabilities
Note: See TracBrowser for help on using the repository browser.