source: orange/Orange/evaluation/reliability.py @ 9725:6c16952df555

Revision 9725:6c16952df555, 30.8 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Updated imports of c extensions.

Line 
1import Orange
2
3import random
4from Orange import statc
5import math
6import warnings
7import numpy
8
9from collections import defaultdict
10from itertools import izip
11
12# Labels and final variables
13labels = ["SAvar", "SAbias", "BAGV", "CNK", "LCV", "BVCK", "Mahalanobis", "ICV"]
14
15"""
16# All the estimators calculation constants
17DO_SA = 0
18DO_BAGV = 1
19DO_CNK = 2
20DO_LCV = 3
21DO_BVCK = 4
22DO_MAHAL = 5
23"""
24
25# All the estimator method constants
26SAVAR_ABSOLUTE = 0
27SABIAS_SIGNED = 1
28SABIAS_ABSOLUTE = 2
29BAGV_ABSOLUTE = 3
30CNK_SIGNED = 4
31CNK_ABSOLUTE = 5
32LCV_ABSOLUTE = 6
33BVCK_ABSOLUTE = 7
34MAHAL_ABSOLUTE = 8
35BLENDING_ABSOLUTE = 9
36ICV_METHOD = 10
37MAHAL_TO_CENTER_ABSOLUTE = 13
38
39# Type of estimator constant
40SIGNED = 0
41ABSOLUTE = 1
42
43# Names of all the estimator methods
44METHOD_NAME = {0: "SAvar absolute", 1: "SAbias signed", 2: "SAbias absolute",
45               3: "BAGV absolute", 4: "CNK signed", 5: "CNK absolute",
46               6: "LCV absolute", 7: "BVCK_absolute", 8: "Mahalanobis absolute",
47               9: "BLENDING absolute", 10: "ICV", 11: "RF Variance", 12: "RF Std",
48               13: "Mahalanobis to center"}
49
50select_with_repeat = Orange.core.MakeRandomIndicesMultiple()
51select_with_repeat.random_generator = Orange.misc.Random()
52
53def get_reliability_estimation_list(res, i):
54    return [result.probabilities[0].reliability_estimate[i].estimate for result in res.results], res.results[0].probabilities[0].reliability_estimate[i].signed_or_absolute, res.results[0].probabilities[0].reliability_estimate[i].method
55
56def get_prediction_error_list(res):
57    return [result.actualClass - result.classes[0] for result in res.results]
58
59def get_description_list(res, i):
60    return [result.probabilities[0].reliability_estimate[i].text_description for result in res.results]
61
62def get_pearson_r(res):
63    """
64    :param res: results of evaluation, done using learners,
65        wrapped into :class:`Orange.evaluation.reliability.Classifier`.
66    :type res: :class:`Orange.evaluation.testing.ExperimentResults`
67
68    Return Pearson's coefficient between the prediction error and each of the
69    used reliability estimates. Also, return the p-value of each of
70    the coefficients.
71    """
72    prediction_error = get_prediction_error_list(res)
73    results = []
74    for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
75        reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
76        try:
77            if signed_or_absolute == SIGNED:
78                r, p = statc.pearsonr(prediction_error, reliability_estimate)
79            else:
80                r, p = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate)
81        except Exception:
82            r = p = float("NaN")
83        results.append((r, p, signed_or_absolute, method))
84    return results
85
86def get_spearman_r(res):
87    """
88    :param res: results of evaluation, done using learners,
89        wrapped into :class:`Orange.evaluation.reliability.Classifier`.
90    :type res: :class:`Orange.evaluation.testing.ExperimentResults`
91
92    Return Spearman's coefficient between the prediction error and each of the
93    used reliability estimates. Also, return the p-value of each of
94    the coefficients.
95    """
96    prediction_error = get_prediction_error_list(res)
97    results = []
98    for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
99        reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
100        try:
101            if signed_or_absolute == SIGNED:
102                r, p = statc.spearmanr(prediction_error, reliability_estimate)
103            else:
104                r, p = statc.spearmanr([abs(pe) for pe in prediction_error], reliability_estimate)
105        except Exception:
106            r = p = float("NaN")
107        results.append((r, p, signed_or_absolute, method))
108    return results
109
110def get_pearson_r_by_iterations(res):
111    """
112    :param res: results of evaluation, done using learners,
113        wrapped into :class:`Orange.evaluation.reliability.Classifier`.
114    :type res: :class:`Orange.evaluation.testing.ExperimentResults`
115
116    Return average Pearson's coefficient over all folds between prediction error
117    and each of the used estimates.
118    """
119    results_by_fold = Orange.evaluation.scoring.split_by_iterations(res)
120    number_of_estimates = len(res.results[0].probabilities[0].reliability_estimate)
121    number_of_instances = len(res.results)
122    number_of_folds = len(results_by_fold)
123    results = [0 for _ in xrange(number_of_estimates)]
124    sig = [0 for _ in xrange(number_of_estimates)]
125    method_list = [0 for _ in xrange(number_of_estimates)]
126   
127    for res in results_by_fold:
128        prediction_error = get_prediction_error_list(res)
129        for i in xrange(number_of_estimates):
130            reliability_estimate, signed_or_absolute, method = get_reliability_estimation_list(res, i)
131            try:
132                if signed_or_absolute == SIGNED:
133                    r, _ = statc.pearsonr(prediction_error, reliability_estimate)
134                else:
135                    r, _ = statc.pearsonr([abs(pe) for pe in prediction_error], reliability_estimate)
136            except Exception:
137                r = float("NaN")
138            results[i] += r
139            sig[i] = signed_or_absolute
140            method_list[i] = method
141   
142    # Calculate p-values
143    results = [float(res) / number_of_folds for res in results]
144    ps = [p_value_from_r(r, number_of_instances) for r in results]
145   
146    return zip(results, ps, sig, method_list)
147
148def p_value_from_r(r, n):
149    """
150    Calculate p-value from the paerson coefficient and the sample size.
151    """
152    df = n - 2
153    t = r * (df /((-r + 1.0 + 1e-30) * (r + 1.0 + 1e-30)) )**0.5
154    return statc.betai (df * 0.5, 0.5, df/(df + t*t))
155
156class Estimate:
157    """
158    Reliability estimate. Contains attributes that describe the results of
159    reliability estimation.
160
161    .. attribute:: estimate
162
163        A numerical reliability estimate.
164
165    .. attribute:: signed_or_absolute
166
167        Determines whether the method used gives a signed or absolute result.
168        Has a value of either :obj:`SIGNED` or :obj:`ABSOLUTE`.
169
170    .. attribute:: method
171
172        An integer ID of reliability estimation method used.
173
174    .. attribute:: method_name
175
176        Name (string) of reliability estimation method used.
177
178    .. attribute:: icv_method
179
180        An integer ID of reliability estimation method that performed best,
181        as determined by ICV, and of which estimate is stored in the
182        :obj:`estimate` field. (:obj:`None` when ICV was not used.)
183
184    .. attribute:: icv_method_name
185
186        Name (string) of reliability estimation method that performed best,
187        as determined by ICV. (:obj:`None` when ICV was not used.)
188
189    """
190    def __init__(self, estimate, signed_or_absolute, method, icv_method = -1):
191        self.estimate = estimate
192        self.signed_or_absolute = signed_or_absolute
193        self.method = method
194        self.method_name = METHOD_NAME[method]
195        self.icv_method = icv_method
196        self.icv_method_name = METHOD_NAME[icv_method] if icv_method != -1 else ""
197        self.text_description = None
198
199class DescriptiveAnalysis:
200    def __init__(self, estimator, desc=["high", "medium", "low"], procentage=[0.00, 0.33, 0.66]):
201        self.desc = desc
202        self.procentage = procentage
203        self.estimator = estimator
204   
205    def __call__(self, instances, weight=None, **kwds):
206       
207        # Calculate borders using cross validation
208        res = Orange.evaluation.testing.cross_validation([self.estimator], instances)
209        all_borders = []
210        for i in xrange(len(res.results[0].probabilities[0].reliability_estimate)):
211            estimates, signed_or_absolute, method = get_reliability_estimation_list(res, i)
212            sorted_estimates = sorted( abs(x) for x in estimates)
213            borders = [sorted_estimates[int(len(estimates)*p)-1]  for p in self.procentage]
214            all_borders.append(borders)
215       
216        # Learn on whole train data
217        estimator_classifier = self.estimator(instances)
218       
219        return DescriptiveAnalysisClassifier(estimator_classifier, all_borders, self.desc)
220
221class DescriptiveAnalysisClassifier:
222    def __init__(self, estimator_classifier, all_borders, desc):
223        self.estimator_classifier = estimator_classifier
224        self.all_borders = all_borders
225        self.desc = desc
226   
227    def __call__(self, instance, result_type=Orange.core.GetValue):
228        predicted, probabilities = self.estimator_classifier(instance, Orange.core.GetBoth)
229       
230        for borders, estimate in zip(self.all_borders, probabilities.reliability_estimate):
231            estimate.text_description = self.desc[0]
232            for lower_border, text_desc in zip(borders, self.desc):
233                if estimate.estimate >= lower_border:
234                    estimate.text_description = text_desc
235       
236        # Return the appropriate type of result
237        if result_type == Orange.core.GetValue:
238            return predicted
239        elif result_type == Orange.core.GetProbabilities:
240            return probabilities
241        else:
242            return predicted, probabilities
243
244class SensitivityAnalysis:
245    """
246   
247    :param e: List of possible :math:`\epsilon` values for SAvar and SAbias
248        reliability estimates.
249    :type e: list of floats
250   
251    :rtype: :class:`Orange.evaluation.reliability.SensitivityAnalysisClassifier`
252   
253    To estimate the reliability of prediction for given instance,
254    the learning set is extended with this instance, labeled with
255    :math:`K + \epsilon (l_{max} - l_{min})`,
256    where :math:`K` denotes the initial prediction,
257    :math:`\epsilon` is sensitivity parameter and :math:`l_{min}` and
258    :math:`l_{max}` denote lower and the upper bound of the learning
259    instances' labels. After computing different sensitivity predictions
260    using different values of :math:`\epsilon`, the prediction are combined
261    into SAvar and SAbias. SAbias can be used in a signed or absolute form.
262
263    :math:`SAvar = \\frac{\sum_{\epsilon \in E}(K_{\epsilon} - K_{-\epsilon})}{|E|}`
264
265    :math:`SAbias = \\frac{\sum_{\epsilon \in E} (K_{\epsilon} - K ) + (K_{-\epsilon} - K)}{2 |E|}`
266   
267   
268    """
269    def __init__(self, e=[0.01, 0.1, 0.5, 1.0, 2.0]):
270        self.e = e
271   
272    def __call__(self, instances, learner):
273        min_value = max_value = instances[0].getclass().value
274        for ex in instances:
275            if ex.getclass().value > max_value:
276                max_value = ex.getclass().value
277            if ex.getclass().value < min_value:
278                min_value = ex.getclass().value
279        return SensitivityAnalysisClassifier(self.e, instances, min_value, max_value, learner)
280   
281class SensitivityAnalysisClassifier:
282    def __init__(self, e, instances, min_value, max_value, learner):
283        self.e = e
284        self.instances = instances
285        self.max_value = max_value
286        self.min_value = min_value
287        self.learner = learner
288   
289    def __call__(self, instance, predicted, probabilities):
290        # Create new dataset
291        r_data = Orange.data.Table(self.instances)
292       
293        # Create new instance
294        modified_instance = Orange.data.Instance(instance)
295       
296        # Append it to the data
297        r_data.append(modified_instance)
298       
299        # Calculate SAvar & SAbias
300        SAvar = SAbias = 0
301       
302        for eps in self.e:
303            # +epsilon
304            r_data[-1].setclass(predicted.value + eps*(self.max_value - self.min_value))
305            c = self.learner(r_data)
306            k_plus = c(instance, Orange.core.GetValue)
307           
308            # -epsilon
309            r_data[-1].setclass(predicted.value - eps*(self.max_value - self.min_value))
310            c = self.learner(r_data)
311            k_minus = c(instance, Orange.core.GetValue)
312            #print len(r_data)
313            #print eps*(self.max_value - self.min_value)
314            #print k_plus
315            #print k_minus
316            # calculate part SAvar and SAbias
317            SAvar += k_plus.value - k_minus.value
318            SAbias += k_plus.value + k_minus.value - 2*predicted.value
319       
320        SAvar /= len(self.e)
321        SAbias /= 2*len(self.e)
322       
323        return [Estimate(SAvar, ABSOLUTE, SAVAR_ABSOLUTE),
324                Estimate(SAbias, SIGNED, SABIAS_SIGNED),
325                Estimate(abs(SAbias), ABSOLUTE, SABIAS_ABSOLUTE)]
326   
327class BaggingVariance:
328    """
329   
330    :param m: Number of bagging models to be used with BAGV estimate
331    :type m: int
332   
333    :rtype: :class:`Orange.evaluation.reliability.BaggingVarianceClassifier`
334   
335    :math:`m` different bagging models are constructed and used to estimate
336    the value of dependent variable for a given instance. The variance of
337    those predictions is used as a prediction reliability estimate.
338
339    :math:`BAGV = \\frac{1}{m} \sum_{i=1}^{m} (K_i - K)^2`
340
341    where :math:`K = \\frac{\sum_{i=1}^{m} K_i}{m}` and :math:`K_i` are
342    predictions of individual constructed models.
343   
344    """
345    def __init__(self, m=50):
346        self.m = m
347   
348    def __call__(self, instances, learner):
349        classifiers = []
350       
351        # Create bagged classifiers using sampling with replacement
352        for _ in xrange(self.m):
353            selection = select_with_repeat(len(instances))
354            data = instances.select(selection)
355            classifiers.append(learner(data))
356        return BaggingVarianceClassifier(classifiers)
357
358class BaggingVarianceClassifier:
359    def __init__(self, classifiers):
360        self.classifiers = classifiers
361   
362    def __call__(self, instance, *args):
363        BAGV = 0
364       
365        # Calculate the bagging variance
366        bagged_values = [c(instance, Orange.core.GetValue).value for c in self.classifiers if c is not None]
367       
368        k = sum(bagged_values) / len(bagged_values)
369       
370        BAGV = sum( (bagged_value - k)**2 for bagged_value in bagged_values) / len(bagged_values)
371       
372        return [Estimate(BAGV, ABSOLUTE, BAGV_ABSOLUTE)]
373       
374class LocalCrossValidation:
375    """
376   
377    :param k: Number of nearest neighbours used in LCV estimate
378    :type k: int
379   
380    :rtype: :class:`Orange.evaluation.reliability.LocalCrossValidationClassifier`
381   
382    :math:`k` nearest neighbours to the given instance are found and put in
383    a separate data set. On this data set, a leave-one-out validation is
384    performed. Reliability estimate is then the distance weighted absolute
385    prediction error.
386
387    If a special value 0 is passed as :math:`k` (as is by default),
388    it is set as 1/20 of data set size (or 5, whichever is greater).
389   
390    1. Determine the set of k nearest neighours :math:`N = { (x_1, c_1),...,
391       (x_k, c_k)}`.
392    2. On this set, compute leave-one-out predictions :math:`K_i` and
393       prediction errors :math:`E_i = | C_i - K_i |`.
394    3. :math:`LCV(x) = \\frac{ \sum_{(x_i, c_i) \in N} d(x_i, x) * E_i }{ \sum_{(x_i, c_i) \in N} d(x_i, x) }`
395   
396    """
397    def __init__(self, k=0):
398        self.k = k
399   
400    def __call__(self, instances, learner):
401        nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor()
402        nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean()
403       
404        distance_id = Orange.data.new_meta_id()
405        nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id)
406       
407        if self.k == 0:
408            self.k = max(5, len(instances)/20)
409       
410        return LocalCrossValidationClassifier(distance_id, nearest_neighbours, self.k, learner)
411
412class LocalCrossValidationClassifier:
413    def __init__(self, distance_id, nearest_neighbours, k, learner):
414        self.distance_id = distance_id
415        self.nearest_neighbours = nearest_neighbours
416        self.k = k
417        self.learner = learner
418   
419    def __call__(self, instance, *args):
420        LCVer = 0
421        LCVdi = 0
422       
423        # Find k nearest neighbors
424       
425        knn = [ex for ex in self.nearest_neighbours(instance, self.k)]
426       
427        # leave one out of prediction error
428        for i in xrange(len(knn)):
429            train = knn[:]
430            del train[i]
431           
432            classifier = self.learner(Orange.data.Table(train))
433           
434            returned_value = classifier(knn[i], Orange.core.GetValue)
435           
436            e = abs(knn[i].getclass().value - returned_value.value)
437           
438            LCVer += e * math.exp(-knn[i][self.distance_id])
439            LCVdi += math.exp(-knn[i][self.distance_id])
440       
441        LCV = LCVer / LCVdi if LCVdi != 0 else 0
442        if math.isnan(LCV):
443            LCV = 0.0
444        return [ Estimate(LCV, ABSOLUTE, LCV_ABSOLUTE) ]
445
446class CNeighbours:
447    """
448   
449    :param k: Number of nearest neighbours used in CNK estimate
450    :type k: int
451   
452    :rtype: :class:`Orange.evaluation.reliability.CNeighboursClassifier`
453   
454    CNK is defined for an unlabeled instance as a difference between average
455    label of its nearest neighbours and its prediction. CNK can be used as a
456    signed or absolute estimate.
457   
458    :math:`CNK = \\frac{\sum_{i=1}^{k}C_i}{k} - K`
459   
460    where :math:`k` denotes number of neighbors, C :sub:`i` denotes neighbours'
461    labels and :math:`K` denotes the instance's prediction.
462   
463    """
464    def __init__(self, k=5):
465        self.k = k
466   
467    def __call__(self, instances, learner):
468        nearest_neighbours_constructor = Orange.classification.knn.FindNearestConstructor()
469        nearest_neighbours_constructor.distanceConstructor = Orange.distance.Euclidean()
470       
471        distance_id = Orange.data.new_meta_id()
472        nearest_neighbours = nearest_neighbours_constructor(instances, 0, distance_id)
473        return CNeighboursClassifier(nearest_neighbours, self.k)
474
475class CNeighboursClassifier:
476    def __init__(self, nearest_neighbours, k):
477        self.nearest_neighbours = nearest_neighbours
478        self.k = k
479   
480    def __call__(self, instance, predicted, probabilities):
481        CNK = 0
482       
483        # Find k nearest neighbors
484       
485        knn = [ex for ex in self.nearest_neighbours(instance, self.k)]
486       
487        # average label of neighbors
488        for ex in knn:
489            CNK += ex.getclass().value
490       
491        CNK /= self.k
492        CNK -= predicted.value
493       
494        return [Estimate(CNK, SIGNED, CNK_SIGNED),
495                Estimate(abs(CNK), ABSOLUTE, CNK_ABSOLUTE)]
496   
497class Mahalanobis:
498    """
499   
500    :param k: Number of nearest neighbours used in Mahalanobis estimate.
501    :type k: int
502   
503    :rtype: :class:`Orange.evaluation.reliability.MahalanobisClassifier`
504   
505    Mahalanobis distance reliability estimate is defined as
506    `mahalanobis distance <http://en.wikipedia.org/wiki/Mahalanobis_distance>`_
507    to the evaluated instance's :math:`k` nearest neighbours.
508
509   
510    """
511    def __init__(self, k=3):
512        self.k = k
513   
514    def __call__(self, instances, *args):
515        nnm = Orange.classification.knn.FindNearestConstructor()
516        nnm.distanceConstructor = Orange.distance.Mahalanobis()
517       
518        mid = Orange.data.new_meta_id()
519        nnm = nnm(instances, 0, mid)
520        return MahalanobisClassifier(self.k, nnm, mid)
521
522class MahalanobisClassifier:
523    def __init__(self, k, nnm, mid):
524        self.k = k
525        self.nnm = nnm
526        self.mid = mid
527   
528    def __call__(self, instance, *args):
529        mahalanobis_distance = 0
530       
531        mahalanobis_distance = sum(ex[self.mid].value for ex in self.nnm(instance, self.k))
532       
533        return [ Estimate(mahalanobis_distance, ABSOLUTE, MAHAL_ABSOLUTE) ]
534
535class MahalanobisToCenter:
536    """
537    :rtype: :class:`Orange.evaluation.reliability.MahalanobisToCenterClassifier`
538   
539    Mahalanobis distance to center reliability estimate is defined as a
540    `mahalanobis distance <http://en.wikipedia.org/wiki/Mahalanobis_distance>`_
541    between the predicted instance and the centroid of the data.
542
543   
544    """
545    def __init__(self):
546        pass
547   
548    def __call__(self, instances, *args):
549        dc = Orange.core.DomainContinuizer()
550        dc.classTreatment = Orange.core.DomainContinuizer.Ignore
551        dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan
552        dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues
553       
554        new_domain = dc(instances)
555        new_instances = instances.translate(new_domain)
556       
557        X, _, _ = new_instances.to_numpy()
558        instance_avg = numpy.average(X, 0)
559       
560        distance_constructor = Orange.distance.Mahalanobis()
561        distance = distance_constructor(new_instances)
562       
563        average_instance = Orange.data.Instance(new_instances.domain, list(instance_avg) + ["?"])
564       
565        return MahalanobisToCenterClassifier(distance, average_instance, new_domain)
566
567class MahalanobisToCenterClassifier:
568    def __init__(self, distance, average_instance, new_domain):
569        self.distance = distance
570        self.average_instance = average_instance
571        self.new_domain = new_domain
572   
573    def __call__(self, instance, *args):
574       
575        inst = Orange.data.Instance(self.new_domain, instance)
576       
577        mahalanobis_to_center = self.distance(inst, self.average_instance)
578       
579        return [ Estimate(mahalanobis_to_center, ABSOLUTE, MAHAL_TO_CENTER_ABSOLUTE) ]
580
581
582class BaggingVarianceCNeighbours:
583    """
584   
585    :param bagv: Instance of Bagging Variance estimator.
586    :type bagv: :class:`Orange.evaluation.reliability.BaggingVariance`
587   
588    :param cnk: Instance of CNK estimator.
589    :type cnk: :class:`Orange.evaluation.reliability.CNeighbours`
590   
591    :rtype: :class:`Orange.evaluation.reliability.BaggingVarianceCNeighboursClassifier`
592   
593    BVCK is a combination (average) of Bagging variance and local modeling of
594    prediction error.
595   
596    """
597    def __init__(self, bagv=BaggingVariance(), cnk=CNeighbours()):
598        self.bagv = bagv
599        self.cnk = cnk
600   
601    def __call__(self, instances, learner):
602        bagv_classifier = self.bagv(instances, learner)
603        cnk_classifier = self.cnk(instances, learner)
604        return BaggingVarianceCNeighboursClassifier(bagv_classifier, cnk_classifier)
605
606class BaggingVarianceCNeighboursClassifier:
607    def __init__(self, bagv_classifier, cnk_classifier):
608        self.bagv_classifier = bagv_classifier
609        self.cnk_classifier = cnk_classifier
610   
611    def __call__(self, instance, predicted, probabilities):
612        bagv_estimates = self.bagv_classifier(instance, predicted, probabilities)
613        cnk_estimates = self.cnk_classifier(instance, predicted, probabilities)
614       
615        bvck_value = (bagv_estimates[0].estimate + cnk_estimates[1].estimate)/2
616        bvck_estimates = [ Estimate(bvck_value, ABSOLUTE, BVCK_ABSOLUTE) ]
617        bvck_estimates.extend(bagv_estimates)
618        bvck_estimates.extend(cnk_estimates)
619        return bvck_estimates
620
621class ErrorPredicting:
622    def __init__(self):
623        pass
624   
625    def __call__(self, instances, learner):
626        res = Orange.evaluation.testing.cross_validation([learner], instances)
627        prediction_errors = get_prediction_error_list(res)
628       
629        new_domain = Orange.data.Domain(instances.domain.attributes, Orange.core.FloatVariable("pe"))
630        new_dataset = Orange.data.Table(new_domain, instances)
631       
632        for instance, prediction_error in izip(new_dataset, prediction_errors):
633            instance.set_class(prediction_error)
634       
635        rf = Orange.ensemble.forest.RandomForestLearner()
636        rf_classifier = rf(new_dataset)
637       
638        return ErrorPredictingClassification(rf_classifier, new_domain)
639       
640class ErrorPredictingClassification:
641    def __init__(self, rf_classifier, new_domain):
642        self.rf_classifier = rf_classifier
643        self.new_domain = new_domain
644   
645    def __call__(self, instance, predicted, probabilities):
646        new_instance = Orange.data.Instance(self.new_domain, instance)
647        value = self.rf_classifier(new_instance, Orange.core.GetValue)
648       
649        return [Estimate(value.value, SIGNED, SABIAS_SIGNED)]
650
651class Learner:
652    """
653    Reliability estimation wrapper around a learner we want to test.
654    Different reliability estimation algorithms can be used on the
655    chosen learner. This learner works as any other and can be used as one,
656    but it returns the classifier, wrapped into an instance of
657    :class:`Orange.evaluation.reliability.Classifier`.
658   
659    :param box_learner: Learner we want to wrap into a reliability estimation
660        classifier.
661    :type box_learner: learner
662   
663    :param estimators: List of different reliability estimation methods we
664                       want to use on the chosen learner.
665    :type estimators: list of reliability estimators
666   
667    :param name: Name of this reliability learner
668    :type name: string
669   
670    :rtype: :class:`Orange.evaluation.reliability.Learner`
671    """
672    def __init__(self, box_learner, name="Reliability estimation",
673                 estimators = [SensitivityAnalysis(),
674                               LocalCrossValidation(),
675                               BaggingVarianceCNeighbours(),
676                               Mahalanobis(),
677                               MahalanobisToCenter()
678                               ],
679                 **kwds):
680        self.__dict__.update(kwds)
681        self.name = name
682        self.estimators = estimators
683        self.box_learner = box_learner
684        self.blending = False
685       
686   
687    def __call__(self, instances, weight=None, **kwds):
688        """Learn from the given table of data instances.
689       
690        :param instances: Data instances to learn from.
691        :type instances: Orange.data.Table
692        :param weight: Id of meta attribute with weights of instances
693        :type weight: integer
694        :rtype: :class:`Orange.evaluation.reliability.Classifier`
695        """
696       
697        blending_classifier = None
698        new_domain = None
699       
700        if instances.domain.class_var.var_type != Orange.data.variable.Continuous.Continuous:
701            raise Exception("This method only works on data with continuous class.")
702       
703        return Classifier(instances, self.box_learner, self.estimators, self.blending, new_domain, blending_classifier)
704   
705    def internal_cross_validation(self, instances, folds=10):
706        """ Perform the internal cross validation for getting the best
707        reliability estimate. It uses the reliability estimators defined in
708        estimators attribute.
709
710        Returns the id of the method that scored the best.
711
712        :param instances: Data instances to use for ICV.
713        :type instances: :class:`Orange.data.Table`
714        :param folds: number of folds for ICV.
715        :type folds: int
716        :rtype: int
717
718        """
719        res = Orange.evaluation.testing.cross_validation([self], instances, folds=folds)
720        results = get_pearson_r(res)
721        sorted_results = sorted(results)
722        return sorted_results[-1][3]
723   
724    def internal_cross_validation_testing(self, instances, folds=10):
725        """ Perform internal cross validation (as in Automatic selection of
726        reliability estimates for individual regression predictions,
727        Zoran Bosnic, 2010) and return id of the method
728        that scored best on this data.
729
730        :param instances: Data instances to use for ICV.
731        :type instances: :class:`Orange.data.Table`
732        :param folds: number of folds for ICV.
733        :type folds: int
734        :rtype: int
735
736        """
737        cv_indices = Orange.core.MakeRandomIndicesCV(instances, folds)
738       
739        list_of_rs = []
740       
741        sum_of_rs = defaultdict(float)
742       
743        for fold in xrange(folds):
744            data = instances.select(cv_indices, fold)
745            if len(data) < 10:
746                res = Orange.evaluation.testing.leave_one_out([self], data)
747            else:
748                res = Orange.evaluation.testing.cross_validation([self], data)
749            results = get_pearson_r(res)
750            for r, _, _, method in results:
751                sum_of_rs[method] += r
752        sorted_sum_of_rs = sorted(sum_of_rs.items(), key=lambda estimate: estimate[1], reverse=True)
753        return sorted_sum_of_rs[0][0]
754   
755    labels = ["SAvar", "SAbias", "BAGV", "CNK", "LCV", "BVCK", "Mahalanobis", "ICV"]
756
757class Classifier:
758    """
759    A reliability estimation wrapper for classifiers.
760
761    What distinguishes this classifier is that the returned probabilities (if
762    :obj:`Orange.classification.Classifier.GetProbabilities` or
763    :obj:`Orange.classification.Classifier.GetBoth` is passed) contain an
764    additional attribute :obj:`reliability_estimate`, which is an instance of
765     :class:`~Orange.evaluation.reliability.Estimate`.
766
767    """
768
769    def __init__(self, instances, box_learner, estimators, blending, blending_domain, rf_classifier, **kwds):
770        self.__dict__.update(kwds)
771        self.instances = instances
772        self.box_learner = box_learner
773        self.estimators = estimators
774        self.blending = blending
775        self.blending_domain = blending_domain
776        self.rf_classifier = rf_classifier
777       
778        # Train the learner with original data
779        self.classifier = box_learner(instances)
780       
781        # Train all the estimators and create their classifiers
782        self.estimation_classifiers = [estimator(instances, box_learner) for estimator in estimators]
783   
784    def __call__(self, instance, result_type=Orange.core.GetValue):
785        """
786        Classify and estimate reliability of estimation for a new instance.
787        When :obj:`result_type` is set to
788        :obj:`Orange.classification.Classifier.GetBoth` or
789        :obj:`Orange.classification.Classifier.GetProbabilities`,
790        an additional attribute :obj:`reliability_estimate`,
791        which is an instance of
792        :class:`~Orange.evaluation.reliability.Estimate`,
793        is added to the distribution object.
794       
795        :param instance: instance to be classified.
796        :type instance: :class:`Orange.data.Instance`
797        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
798              :class:`Orange.classification.Classifier.GetProbabilities` or
799              :class:`Orange.classification.Classifier.GetBoth`
800       
801        :rtype: :class:`Orange.data.Value`,
802              :class:`Orange.statistics.Distribution` or a tuple with both
803        """
804        predicted, probabilities = self.classifier(instance, Orange.core.GetBoth)
805       
806        # Create a place holder for estimates
807        if probabilities is None:
808            probabilities = Orange.statistics.distribution.Continuous()
809        #with warnings.catch_warnings():
810        #    warnings.simplefilter("ignore")
811        probabilities.setattr('reliability_estimate', [])
812       
813        # Calculate all the estimates and add them to the results
814        for estimate in self.estimation_classifiers:
815            probabilities.reliability_estimate.extend(estimate(instance, predicted, probabilities))
816       
817        # Return the appropriate type of result
818        if result_type == Orange.core.GetValue:
819            return predicted
820        elif result_type == Orange.core.GetProbabilities:
821            return probabilities
822        else:
823            return predicted, probabilities
Note: See TracBrowser for help on using the repository browser.