source: orange-reliability/_reliability/__init__.py @ 0:55e4bdcfe4e3

Revision 0:55e4bdcfe4e3, 30.1 KB checked in by Matija Polajnar <matija.polajnar@…>, 23 months ago (diff)

Initial version as moved from main Orange. Without documentation.

Line 
1import Orange
2
3import random
4from Orange import statc
5import math
6import warnings
7import numpy
8
9from collections import defaultdict
10from itertools import izip
11
12# Labels and final variables
13labels = ["SAvar", "SAbias", "BAGV", "CNK", "LCV", "BVCK", "Mahalanobis", "ICV"]
14
15"""
16# All the estimators calculation constants
17DO_SA = 0
18DO_BAGV = 1
19DO_CNK = 2
20DO_LCV = 3
21DO_BVCK = 4
22DO_MAHAL = 5
23"""
24
25# All the estimator method constants
26SAVAR_ABSOLUTE = 0
27SABIAS_SIGNED = 1
28SABIAS_ABSOLUTE = 2
29BAGV_ABSOLUTE = 3
30CNK_SIGNED = 4
31CNK_ABSOLUTE = 5
32LCV_ABSOLUTE = 6
33BVCK_ABSOLUTE = 7
34MAHAL_ABSOLUTE = 8
35BLENDING_ABSOLUTE = 9
36ICV_METHOD = 10
37MAHAL_TO_CENTER_ABSOLUTE = 13
38
39# Type of estimator constant
40SIGNED = 0
41ABSOLUTE = 1
42
43# Names of all the estimator methods
44METHOD_NAME = {0: "SAvar absolute", 1: "SAbias signed", 2: "SAbias absolute",
45               3: "BAGV absolute", 4: "CNK signed", 5: "CNK absolute",
46               6: "LCV absolute", 7: "BVCK_absolute", 8: "Mahalanobis absolute",
47               9: "BLENDING absolute", 10: "ICV", 11: "RF Variance", 12: "RF Std",
48               13: "Mahalanobis to center"}
49
50select_with_repeat = Orange.core.MakeRandomIndicesMultiple()
51select_with_repeat.random_generator = Orange.misc.Random()
52
53def get_reliability_estimation_list(res, i):
54    return [result.probabilities[0].reliability_estimate[i].estimate for result in res.results], res.results[0].probabilities[0].reliability_estimate[i].signed_or_absolute, res.results[0].probabilities[0].reliability_estimate[i].method
55
56def get_prediction_error_list(res):
57    return [result.actual_class - result.classes[0] for result in res.results]
58
59def get_description_list(res, i):
60    return [result.probabilities[0].reliability_estimate[i].text_description for result in res.results]
61
62def get_pearson_r(res):
63    """
64    :param res: results of evaluation, done using learners,
65        wrapped into :class:`Orange.evaluation.reliability.Classifier`.
66    :type res: :class:`Orange.evaluation.testing.ExperimentResults`
67
68    Return Pearson's coefficient between the prediction error and each of the
69    used reliability estimates. Also, return the p-value of each of
70    the coefficients.
71    """
72    prediction_error = get_prediction_error_list(res)
73    results = []
74    for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
75        reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
76        try:
77            if signed_or_absolute == SIGNED:
78                r, p = statc.pearsonr(prediction_error, reliability_estimate)
79            else:
80                r, p = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate)
81        except Exception:
82            r = p = float("NaN")
83        results.append((r, p, signed_or_absolute, method))
84    return results
85
86def get_spearman_r(res):
87    """
88    :param res: results of evaluation, done using learners,
89        wrapped into :class:`Orange.evaluation.reliability.Classifier`.
90    :type res: :class:`Orange.evaluation.testing.ExperimentResults`
91
92    Return Spearman's coefficient between the prediction error and each of the
93    used reliability estimates. Also, return the p-value of each of
94    the coefficients.
95    """
96    prediction_error = get_prediction_error_list(res)
97    results = []
98    for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
99        reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
100        try:
101            if signed_or_absolute == SIGNED:
102                r, p = statc.spearmanr(prediction_error, reliability_estimate)
103            else:
104                r, p = statc.spearmanr([abs(pe) for pe in prediction_error], reliability_estimate)
105        except Exception:
106            r = p = float("NaN")
107        results.append((r, p, signed_or_absolute, method))
108    return results
109
110def get_pearson_r_by_iterations(res):
111    """
112    :param res: results of evaluation, done using learners,
113        wrapped into :class:`Orange.evaluation.reliability.Classifier`.
114    :type res: :class:`Orange.evaluation.testing.ExperimentResults`
115
116    Return average Pearson's coefficient over all folds between prediction error
117    and each of the used estimates.
118    """
119    results_by_fold = Orange.evaluation.scoring.split_by_iterations(res)
120    number_of_estimates = len(res.results[0].probabilities[0].reliability_estimate)
121    number_of_instances = len(res.results)
122    number_of_folds = len(results_by_fold)
123    results = [0 for _ in xrange(number_of_estimates)]
124    sig = [0 for _ in xrange(number_of_estimates)]
125    method_list = [0 for _ in xrange(number_of_estimates)]
126
127    for res in results_by_fold:
128        prediction_error = get_prediction_error_list(res)
129        for i in xrange(number_of_estimates):
130            reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
131            try:
132                if signed_or_absolute == SIGNED:
133                    r, _ = statc.pearsonr(prediction_error, reliability_estimate)
134                else:
135                    r, _ = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate)
136            except Exception:
137                r = float("NaN")
138            results[i] += r
139            sig[i] = signed_or_absolute
140            method_list[i] = method
141
142    # Calculate p-values
143    results = [float(res) / number_of_folds for res in results]
144    ps = [p_value_from_r(r, number_of_instances) for r in results]
145
146    return zip(results, ps, sig, method_list)
147
148def p_value_from_r(r, n):
149    """
150    Calculate p-value from the paerson coefficient and the sample size.
151    """
152    df = n - 2
153    t = r * (df / ((-r + 1.0 + 1e-30) * (r + 1.0 + 1e-30))) ** 0.5
154    return statc.betai (df * 0.5, 0.5, df / (df + t * t))
155
156class Estimate:
157    """
158    Reliability estimate. Contains attributes that describe the results of
159    reliability estimation.
160
161    .. attribute:: estimate
162
163        A numerical reliability estimate.
164
165    .. attribute:: signed_or_absolute
166
167        Determines whether the method used gives a signed or absolute result.
168        Has a value of either :obj:`SIGNED` or :obj:`ABSOLUTE`.
169
170    .. attribute:: method
171
172        An integer ID of reliability estimation method used.
173
174    .. attribute:: method_name
175
176        Name (string) of reliability estimation method used.
177
178    .. attribute:: icv_method
179
180        An integer ID of reliability estimation method that performed best,
181        as determined by ICV, and of which estimate is stored in the
182        :obj:`estimate` field. (:obj:`None` when ICV was not used.)
183
184    .. attribute:: icv_method_name
185
186        Name (string) of reliability estimation method that performed best,
187        as determined by ICV. (:obj:`None` when ICV was not used.)
188
189    """
190    def __init__(self, estimate, signed_or_absolute, method, icv_method= -1):
191        self.estimate = estimate
192        self.signed_or_absolute = signed_or_absolute
193        self.method = method
194        self.method_name = METHOD_NAME[method]
195        self.icv_method = icv_method
196        self.icv_method_name = METHOD_NAME[icv_method] if icv_method != -1 else ""
197        self.text_description = None
198
199class DescriptiveAnalysis:
200    def __init__(self, estimator, desc=["high", "medium", "low"], procentage=[0.00, 0.33, 0.66]):
201        self.desc = desc
202        self.procentage = procentage
203        self.estimator = estimator
204
205    def __call__(self, instances, weight=None, **kwds):
206
207        # Calculate borders using cross validation
208        res = Orange.evaluation.testing.cross_validation([self.estimator], instances)
209        all_borders = []
210        for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
211            estimates, signed_or_absolute, method = get_reliability_estimation_list(res, i)
212            sorted_estimates = sorted(abs(x) for x in estimates)
213            borders = [sorted_estimates[int(len(estimates) * p) - 1]  for p in self.procentage]
214            all_borders.append(borders)
215
216        # Learn on whole train data
217        estimator_classifier = self.estimator(instances)
218
219        return DescriptiveAnalysisClassifier(estimator_classifier, all_borders, self.desc)
220
221class DescriptiveAnalysisClassifier:
222    def __init__(self, estimator_classifier, all_borders, desc):
223        self.estimator_classifier = estimator_classifier
224        self.all_borders = all_borders
225        self.desc = desc
226
227    def __call__(self, instance, result_type=Orange.core.GetValue):
228        predicted, probabilities = self.estimator_classifier(instance, Orange.core.GetBoth)
229
230        for borders, estimate in zip(self.all_borders, probabilities.reliability_estimate):
231            estimate.text_description = self.desc[0]
232            for lower_border, text_desc in zip(borders, self.desc):
233                if estimate.estimate >= lower_border:
234                    estimate.text_description = text_desc
235
236        # Return the appropriate type of result
237        if result_type == Orange.core.GetValue:
238            return predicted
239        elif result_type == Orange.core.GetProbabilities:
240            return probabilities
241        else:
242            return predicted, probabilities
243
244class SensitivityAnalysis:
245    """
246   
247    :param e: List of possible :math:`\epsilon` values for SAvar and SAbias
248        reliability estimates.
249    :type e: list of floats
250   
251    :rtype: :class:`Orange.evaluation.reliability.SensitivityAnalysisClassifier`
252   
253    To estimate the reliability of prediction for given instance,
254    the learning set is extended with this instance, labeled with
255    :math:`K + \epsilon (l_{max} - l_{min})`,
256    where :math:`K` denotes the initial prediction,
257    :math:`\epsilon` is sensitivity parameter and :math:`l_{min}` and
258    :math:`l_{max}` denote lower and the upper bound of the learning
259    instances' labels. After computing different sensitivity predictions
260    using different values of :math:`\epsilon`, the prediction are combined
261    into SAvar and SAbias. SAbias can be used in a signed or absolute form.
262
263    :math:`SAvar = \\frac{\sum_{\epsilon \in E}(K_{\epsilon} - K_{-\epsilon})}{|E|}`
264
265    :math:`SAbias = \\frac{\sum_{\epsilon \in E} (K_{\epsilon} - K ) + (K_{-\epsilon} - K)}{2 |E|}`
266   
267   
268    """
269    def __init__(self, e=[0.01, 0.1, 0.5, 1.0, 2.0]):
270        self.e = e
271
272    def __call__(self, instances, learner):
273        min_value = max_value = instances[0].getclass().value
274        for ex in instances:
275            if ex.getclass().value > max_value:
276                max_value = ex.getclass().value
277            if ex.getclass().value < min_value:
278                min_value = ex.getclass().value
279        return SensitivityAnalysisClassifier(self.e, instances, min_value, max_value, learner)
280
281class SensitivityAnalysisClassifier:
282    def __init__(self, e, instances, min_value, max_value, learner):
283        self.e = e
284        self.instances = instances
285        self.max_value = max_value
286        self.min_value = min_value
287        self.learner = learner
288
289    def __call__(self, instance, predicted, probabilities):
290        # Create new dataset
291        r_data = Orange.data.Table(self.instances)
292
293        # Create new instance
294        modified_instance = Orange.data.Instance(instance)
295
296        # Append it to the data
297        r_data.append(modified_instance)
298
299        # Calculate SAvar & SAbias
300        SAvar = SAbias = 0
301
302        for eps in self.e:
303            # +epsilon
304            r_data[-1].setclass(predicted.value + eps * (self.max_value - self.min_value))
305            c = self.learner(r_data)
306            k_plus = c(instance, Orange.core.GetValue)
307
308            # -epsilon
309            r_data[-1].setclass(predicted.value - eps * (self.max_value - self.min_value))
310            c = self.learner(r_data)
311            k_minus = c(instance, Orange.core.GetValue)
312            #print len(r_data)
313            #print eps*(self.max_value - self.min_value)
314            #print k_plus
315            #print k_minus
316            # calculate part SAvar and SAbias
317            SAvar += k_plus.value - k_minus.value
318            SAbias += k_plus.value + k_minus.value - 2 * predicted.value
319
320        SAvar /= len(self.e)
321        SAbias /= 2 * len(self.e)
322
323        return [Estimate(SAvar, ABSOLUTE, SAVAR_ABSOLUTE),
324                Estimate(SAbias, SIGNED, SABIAS_SIGNED),
325                Estimate(abs(SAbias), ABSOLUTE, SABIAS_ABSOLUTE)]
326
327class BaggingVariance:
328    """
329   
330    :param m: Number of bagging models to be used with BAGV estimate
331    :type m: int
332   
333    :rtype: :class:`Orange.evaluation.reliability.BaggingVarianceClassifier`
334   
335    :math:`m` different bagging models are constructed and used to estimate
336    the value of dependent variable for a given instance. The variance of
337    those predictions is used as a prediction reliability estimate.
338
339    :math:`BAGV = \\frac{1}{m} \sum_{i=1}^{m} (K_i - K)^2`
340
341    where :math:`K = \\frac{\sum_{i=1}^{m} K_i}{m}` and :math:`K_i` are
342    predictions of individual constructed models.
343   
344    """
345    def __init__(self, m=50):
346        self.m = m
347
348    def __call__(self, instances, learner):
349        classifiers = []
350
351        # Create bagged classifiers using sampling with replacement
352        for _ in xrange(self.m):
353            selection = select_with_repeat(len(instances))
354            data = instances.select(selection)
355            classifiers.append(learner(data))
356        return BaggingVarianceClassifier(classifiers)
357
358class BaggingVarianceClassifier:
359    def __init__(self, classifiers):
360        self.classifiers = classifiers
361
362    def __call__(self, instance, *args):
363        BAGV = 0
364
365        # Calculate the bagging variance
366        bagged_values = [c(instance, Orange.core.GetValue).value for c in self.classifiers if c is not None]
367
368        k = sum(bagged_values) / len(bagged_values)
369
370        BAGV = sum((bagged_value - k) ** 2 for bagged_value in bagged_values) / len(bagged_values)
371
372        return [Estimate(BAGV, ABSOLUTE, BAGV_ABSOLUTE)]
373
374class LocalCrossValidation:
375    """
376   
377    :param k: Number of nearest neighbours used in LCV estimate
378    :type k: int
379   
380    :rtype: :class:`Orange.evaluation.reliability.LocalCrossValidationClassifier`
381   
382    :math:`k` nearest neighbours to the given instance are found and put in
383    a separate data set. On this data set, a leave-one-out validation is
384    performed. Reliability estimate is then the distance weighted absolute
385    prediction error.
386
387    If a special value 0 is passed as :math:`k` (as is by default),
388    it is set as 1/20 of data set size (or 5, whichever is greater).
389   
390    1. Determine the set of k nearest neighours :math:`N = { (x_1, c_1),...,
391       (x_k, c_k)}`.
392    2. On this set, compute leave-one-out predictions :math:`K_i` and
393       prediction errors :math:`E_i = | C_i - K_i |`.
394    3. :math:`LCV(x) = \\frac{ \sum_{(x_i, c_i) \in N} d(x_i, x) * E_i }{ \sum_{(x_i, c_i) \in N} d(x_i, x) }`
395   
396    """
397    def __init__(self, k=0):
398        self.k = k
399
400    def __call__(self, instances, learner):
401        nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor()
402        nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean()
403
404        distance_id = Orange.feature.Descriptor.new_meta_id()
405        nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id)
406
407        if self.k == 0:
408            self.k = max(5, len(instances) / 20)
409
410        return LocalCrossValidationClassifier(distance_id, nearest_neighbours, self.k, learner)
411
412class LocalCrossValidationClassifier:
413    def __init__(self, distance_id, nearest_neighbours, k, learner):
414        self.distance_id = distance_id
415        self.nearest_neighbours = nearest_neighbours
416        self.k = k
417        self.learner = learner
418
419    def __call__(self, instance, *args):
420        LCVer = 0
421        LCVdi = 0
422
423        # Find k nearest neighbors
424
425        knn = [ex for ex in self.nearest_neighbours(instance, self.k)]
426
427        # leave one out of prediction error
428        for i in xrange(len(knn)):
429            train = knn[:]
430            del train[i]
431
432            classifier = self.learner(Orange.data.Table(train))
433
434            returned_value = classifier(knn[i], Orange.core.GetValue)
435
436            e = abs(knn[i].getclass().value - returned_value.value)
437
438            LCVer += e * math.exp(-knn[i][self.distance_id])
439            LCVdi += math.exp(-knn[i][self.distance_id])
440
441        LCV = LCVer / LCVdi if LCVdi != 0 else 0
442        if math.isnan(LCV):
443            LCV = 0.0
444        return [ Estimate(LCV, ABSOLUTE, LCV_ABSOLUTE) ]
445
446class CNeighbours:
447    """
448   
449    :param k: Number of nearest neighbours used in CNK estimate
450    :type k: int
451   
452    :rtype: :class:`Orange.evaluation.reliability.CNeighboursClassifier`
453   
454    CNK is defined for an unlabeled instance as a difference between average
455    label of its nearest neighbours and its prediction. CNK can be used as a
456    signed or absolute estimate.
457   
458    :math:`CNK = \\frac{\sum_{i=1}^{k}C_i}{k} - K`
459   
460    where :math:`k` denotes number of neighbors, C :sub:`i` denotes neighbours'
461    labels and :math:`K` denotes the instance's prediction.
462   
463    """
464    def __init__(self, k=5):
465        self.k = k
466
467    def __call__(self, instances, learner):
468        nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor()
469        nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean()
470
471        distance_id = Orange.feature.Descriptor.new_meta_id()
472        nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id)
473        return CNeighboursClassifier(nearest_neighbours, self.k)
474
475class CNeighboursClassifier:
476    def __init__(self, nearest_neighbours, k):
477        self.nearest_neighbours = nearest_neighbours
478        self.k = k
479
480    def __call__(self, instance, predicted, probabilities):
481        CNK = 0
482
483        # Find k nearest neighbors
484
485        knn = [ex for ex in self.nearest_neighbours(instance, self.k)]
486
487        # average label of neighbors
488        for ex in knn:
489            CNK += ex.getclass().value
490
491        CNK /= self.k
492        CNK -= predicted.value
493
494        return [Estimate(CNK, SIGNED, CNK_SIGNED),
495                Estimate(abs(CNK), ABSOLUTE, CNK_ABSOLUTE)]
496
497class Mahalanobis:
498    """
499   
500    :param k: Number of nearest neighbours used in Mahalanobis estimate.
501    :type k: int
502   
503    :rtype: :class:`Orange.evaluation.reliability.MahalanobisClassifier`
504   
505    Mahalanobis distance reliability estimate is defined as
506    `mahalanobis distance <http://en.wikipedia.org/wiki/Mahalanobis_distance>`_
507    to the evaluated instance's :math:`k` nearest neighbours.
508
509   
510    """
511    def __init__(self, k=3):
512        self.k = k
513
514    def __call__(self, instances, *args):
515        nnm = Orange.classification.knn.FindNearestConstructor()
516        nnm.distanceConstructor = Orange.distance.Mahalanobis()
517
518        mid = Orange.feature.Descriptor.new_meta_id()
519        nnm = nnm(instances, 0, mid)
520        return MahalanobisClassifier(self.k, nnm, mid)
521
522class MahalanobisClassifier:
523    def __init__(self, k, nnm, mid):
524        self.k = k
525        self.nnm = nnm
526        self.mid = mid
527
528    def __call__(self, instance, *args):
529        mahalanobis_distance = 0
530
531        mahalanobis_distance = sum(ex[self.mid].value for ex in self.nnm(instance, self.k))
532
533        return [ Estimate(mahalanobis_distance, ABSOLUTE, MAHAL_ABSOLUTE) ]
534
535class MahalanobisToCenter:
536    """
537    :rtype: :class:`Orange.evaluation.reliability.MahalanobisToCenterClassifier`
538   
539    Mahalanobis distance to center reliability estimate is defined as a
540    `mahalanobis distance <http://en.wikipedia.org/wiki/Mahalanobis_distance>`_
541    between the predicted instance and the centroid of the data.
542
543   
544    """
545    def __init__(self):
546        pass
547
548    def __call__(self, instances, *args):
549        dc = Orange.core.DomainContinuizer()
550        dc.classTreatment = Orange.core.DomainContinuizer.Ignore
551        dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan
552        dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues
553
554        new_domain = dc(instances)
555        new_instances = instances.translate(new_domain)
556
557        X, _, _ = new_instances.to_numpy()
558        instance_avg = numpy.average(X, 0)
559
560        distance_constructor = Orange.distance.Mahalanobis()
561        distance = distance_constructor(new_instances)
562
563        average_instance = Orange.data.Instance(new_instances.domain, list(instance_avg) + ["?"])
564
565        return MahalanobisToCenterClassifier(distance, average_instance, new_domain)
566
567class MahalanobisToCenterClassifier:
568    def __init__(self, distance, average_instance, new_domain):
569        self.distance = distance
570        self.average_instance = average_instance
571        self.new_domain = new_domain
572
573    def __call__(self, instance, *args):
574
575        inst = Orange.data.Instance(self.new_domain, instance)
576
577        mahalanobis_to_center = self.distance(inst, self.average_instance)
578
579        return [ Estimate(mahalanobis_to_center, ABSOLUTE, MAHAL_TO_CENTER_ABSOLUTE) ]
580
581
582class BaggingVarianceCNeighbours:
583    """
584   
585    :param bagv: Instance of Bagging Variance estimator.
586    :type bagv: :class:`BaggingVariance`
587   
588    :param cnk: Instance of CNK estimator.
589    :type cnk: :class:`CNeighbours`
590   
591    :rtype: :class:`Orange.evaluation.reliability.BaggingVarianceCNeighboursClassifier`
592   
593    BVCK is a combination (average) of Bagging variance and local modeling of
594    prediction error.
595   
596    """
597    def __init__(self, bagv=BaggingVariance(), cnk=CNeighbours()):
598        self.bagv = bagv
599        self.cnk = cnk
600
601    def __call__(self, instances, learner):
602        bagv_classifier = self.bagv(instances, learner)
603        cnk_classifier = self.cnk(instances, learner)
604        return BaggingVarianceCNeighboursClassifier(bagv_classifier, cnk_classifier)
605
606class BaggingVarianceCNeighboursClassifier:
607    def __init__(self, bagv_classifier, cnk_classifier):
608        self.bagv_classifier = bagv_classifier
609        self.cnk_classifier = cnk_classifier
610
611    def __call__(self, instance, predicted, probabilities):
612        bagv_estimates = self.bagv_classifier(instance, predicted, probabilities)
613        cnk_estimates = self.cnk_classifier(instance, predicted, probabilities)
614
615        bvck_value = (bagv_estimates[0].estimate + cnk_estimates[1].estimate) / 2
616        bvck_estimates = [ Estimate(bvck_value, ABSOLUTE, BVCK_ABSOLUTE) ]
617        bvck_estimates.extend(bagv_estimates)
618        bvck_estimates.extend(cnk_estimates)
619        return bvck_estimates
620
621class ErrorPredicting:
622    def __init__(self):
623        pass
624
625    def __call__(self, instances, learner):
626        res = Orange.evaluation.testing.cross_validation([learner], instances)
627        prediction_errors = get_prediction_error_list(res)
628
629        new_domain = Orange.data.Domain(instances.domain.attributes, Orange.core.FloatVariable("pe"))
630        new_dataset = Orange.data.Table(new_domain, instances)
631
632        for instance, prediction_error in izip(new_dataset, prediction_errors):
633            instance.set_class(prediction_error)
634
635        rf = Orange.ensemble.forest.RandomForestLearner()
636        rf_classifier = rf(new_dataset)
637
638        return ErrorPredictingClassification(rf_classifier, new_domain)
639
640class ErrorPredictingClassification:
641    def __init__(self, rf_classifier, new_domain):
642        self.rf_classifier = rf_classifier
643        self.new_domain = new_domain
644
645    def __call__(self, instance, predicted, probabilities):
646        new_instance = Orange.data.Instance(self.new_domain, instance)
647        value = self.rf_classifier(new_instance, Orange.core.GetValue)
648
649        return [Estimate(value.value, SIGNED, SABIAS_SIGNED)]
650
651class Learner:
652    """
653    Reliability estimation wrapper around a learner we want to test.
654    Different reliability estimation algorithms can be used on the
655    chosen learner. This learner works as any other and can be used as one,
656    but it returns the classifier, wrapped into an instance of
657    :class:`Orange.evaluation.reliability.Classifier`.
658   
659    :param box_learner: Learner we want to wrap into a reliability estimation
660        classifier.
661    :type box_learner: :obj:`~Orange.classification.Learner`
662   
663    :param estimators: List of different reliability estimation methods we
664                       want to use on the chosen learner.
665    :type estimators: :obj:`list` of reliability estimators
666   
667    :param name: Name of this reliability learner
668    :type name: string
669   
670    :rtype: :class:`Orange.evaluation.reliability.Learner`
671    """
672    def __init__(self, box_learner, name="Reliability estimation",
673                 estimators=[SensitivityAnalysis(),
674                             LocalCrossValidation(),
675                             BaggingVarianceCNeighbours(),
676                             Mahalanobis(),
677                             MahalanobisToCenter()],
678                 **kwds):
679        self.__dict__.update(kwds)
680        self.name = name
681        self.estimators = estimators
682        self.box_learner = box_learner
683        self.blending = False
684
685
686    def __call__(self, instances, weight=None, **kwds):
687        """Learn from the given table of data instances.
688       
689        :param instances: Data instances to learn from.
690        :type instances: Orange.data.Table
691        :param weight: Id of meta attribute with weights of instances
692        :type weight: int
693        :rtype: :class:`Orange.evaluation.reliability.Classifier`
694        """
695
696        blending_classifier = None
697        new_domain = None
698
699#        if instances.domain.class_var.var_type != Orange.feature.Continuous.Continuous:
700#            raise Exception("This method only works on data with continuous class.")
701
702        return Classifier(instances, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier)
703
704    def internal_cross_validation(self, instances, folds=10):
705        """ Perform the internal cross validation for getting the best
706        reliability estimate. It uses the reliability estimators defined in
707        estimators attribute.
708
709        Returns the id of the method that scored the best.
710
711        :param instances: Data instances to use for ICV.
712        :type instances: :class:`Orange.data.Table`
713        :param folds: number of folds for ICV.
714        :type folds: int
715        :rtype: int
716
717        """
718        res = Orange.evaluation.testing.cross_validation([self], instances, folds=folds)
719        results = get_pearson_r(res)
720        sorted_results = sorted(results)
721        return sorted_results[-1][3]
722
723    def internal_cross_validation_testing(self, instances, folds=10):
724        """ Perform internal cross validation (as in Automatic selection of
725        reliability estimates for individual regression predictions,
726        Zoran Bosnic, 2010) and return id of the method
727        that scored best on this data.
728
729        :param instances: Data instances to use for ICV.
730        :type instances: :class:`Orange.data.Table`
731        :param folds: number of folds for ICV.
732        :type folds: int
733        :rtype: int
734
735        """
736        cv_indices = Orange.core.MakeRandomIndicesCV(instances, folds)
737
738        list_of_rs = []
739
740        sum_of_rs = defaultdict(float)
741
742        for fold in xrange(folds):
743            data = instances.select(cv_indices, fold)
744            if len(data) < 10:
745                res = Orange.evaluation.testing.leave_one_out([self], data)
746            else:
747                res = Orange.evaluation.testing.cross_validation([self], data)
748            results = get_pearson_r(res)
749            for r, _, _, method in results:
750                sum_of_rs[method] += r
751        sorted_sum_of_rs = sorted(sum_of_rs.items(), key=lambda estimate: estimate[1], reverse=True)
752        return sorted_sum_of_rs[0][0]
753
754    labels = ["SAvar", "SAbias", "BAGV", "CNK", "LCV", "BVCK", "Mahalanobis", "ICV"]
755
756class Classifier:
757    """
758    A reliability estimation wrapper for classifiers.
759
760    What distinguishes this classifier is that the returned probabilities (if
761    :obj:`Orange.classification.Classifier.GetProbabilities` or
762    :obj:`Orange.classification.Classifier.GetBoth` is passed) contain an
763    additional attribute :obj:`reliability_estimate`, which is an instance of
764    :class:`~Orange.evaluation.reliability.Estimate`.
765
766    """
767
768    def __init__(self, instances, box_learner, estimators, blending, blending_domain, rf_classifier, **kwds):
769        self.__dict__.update(kwds)
770        self.instances = instances
771        self.box_learner = box_learner
772        self.estimators = estimators
773        self.blending = blending
774        self.blending_domain = blending_domain
775        self.rf_classifier = rf_classifier
776
777        # Train the learner with original data
778        self.classifier = box_learner(instances)
779
780        # Train all the estimators and create their classifiers
781        self.estimation_classifiers = [estimator(instances, box_learner) for estimator in estimators]
782
783    def __call__(self, instance, result_type=Orange.core.GetValue):
784        """
785        Classify and estimate reliability of estimation for a new instance.
786        When :obj:`result_type` is set to
787        :obj:`Orange.classification.Classifier.GetBoth` or
788        :obj:`Orange.classification.Classifier.GetProbabilities`,
789        an additional attribute :obj:`reliability_estimate`,
790        which is an instance of
791        :class:`~Orange.evaluation.reliability.Estimate`,
792        is added to the distribution object.
793       
794        :param instance: instance to be classified.
795        :type instance: :class:`Orange.data.Instance`
796        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
797              :class:`Orange.classification.Classifier.GetProbabilities` or
798              :class:`Orange.classification.Classifier.GetBoth`
799       
800        :rtype: :class:`Orange.data.Value`,
801              :class:`Orange.statistics.Distribution` or a tuple with both
802        """
803        predicted, probabilities = self.classifier(instance, Orange.core.GetBoth)
804
805        # Create a place holder for estimates
806        if probabilities is None:
807            probabilities = Orange.statistics.distribution.Continuous()
808        #with warnings.catch_warnings():
809        #    warnings.simplefilter("ignore")
810        probabilities.setattr('reliability_estimate', [])
811
812        # Calculate all the estimates and add them to the results
813        for estimate in self.estimation_classifiers:
814            probabilities.reliability_estimate.extend(estimate(instance, predicted, probabilities))
815
816        # Return the appropriate type of result
817        if result_type == Orange.core.GetValue:
818            return predicted
819        elif result_type == Orange.core.GetProbabilities:
820            return probabilities
821        else:
822            return predicted, probabilities
Note: See TracBrowser for help on using the repository browser.