source: orange/Orange/ensemble/bagging.py @ 10963:44453b134698

Revision 10963:44453b134698, 6.3 KB checked in by Ales Erjavec <ales.erjavec@…>, 20 months ago (diff)

Set the 'distribution.variable' in BaggingClassifier and ProbabilityEstimator_loess

The retured distribution of BaggedClassifier.call would raise an
'invalid distribution (no variable)' when calling '.items()', Similary any
classifier where ProbabilityEstimator_loess can be used (for instance
MajorityClassifier).

Line 
1import math
2import random
3
4import Orange.core as orange
5import Orange
6
7
8class BaggedLearner(orange.Learner):
9    """
10    BaggedLearner takes a learner and returns a bagged learner, which is
11    essentially a wrapper around the learner passed as an argument. If
12    instances are passed in arguments, BaggedLearner returns a bagged
13    classifier. Both learner and classifier then behave just like any
14    other learner and classifier in Orange.
15
16    Bagging, in essence, takes training data and a learner, and builds *t*
17    classifiers, each time presenting a learner a bootstrap sample from the
18    training data. When given a test instance, classifiers vote on class,
19    and a bagged classifier returns a class with the highest number of votes.
20    As implemented in Orange, when class probabilities are requested, these
21    are proportional to the number of votes for a particular class.
22
23    :param learner: learner to be bagged.
24    :type learner: :class:`Orange.core.Learner`
25    :param t: number of bagged classifiers, that is, classifiers created
26        when instances are passed to bagged learner.
27    :type t: int
28    :param name: name of the resulting learner.
29    :type name: str
30    :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier` or
31            :class:`Orange.ensemble.bagging.BaggedLearner`
32    """
33    def __new__(cls, learner, instances=None, weight_id=None, **kwargs):
34        self = orange.Learner.__new__(cls, **kwargs)
35        if instances is not None:
36            self.__init__(self, learner, **kwargs)
37            return self.__call__(instances, weight_id)
38        else:
39            return self
40
41    def __init__(self, learner, t=10, name='Bagging'):
42        self.t = t
43        self.name = name
44        self.learner = learner
45
46    def __call__(self, instances, weight=0):
47        """
48        Learn from the given table of data instances.
49
50        :param instances: data instances to learn from.
51        :type instances: Orange.data.Table
52        :param weight: ID of meta feature with weights of instances
53        :type weight: int
54        :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier`
55
56        """
57        r = random.Random()
58        r.seed(0)
59
60        n = len(instances)
61        classifiers = []
62        for i in range(self.t):
63            selection = []
64            for i in range(n):
65                selection.append(r.randrange(n))
66            instances = Orange.data.Table(instances)
67            data = instances.getitems(selection)
68            classifiers.append(self.learner(data, weight))
69        return BaggedClassifier(classifiers=classifiers, name=self.name, \
70                    class_var=instances.domain.class_var)
71
72BaggedLearner = Orange.utils.deprecated_members(
73    {"weightId": "weight_id", "examples": "instances"}
74    )(BaggedLearner)
75
76
77class BaggedClassifier(orange.Classifier):
78    """
79    A classifier that uses a bagging technique. Usually the learner
80    (:class:`Orange.ensemble.bagging.BaggedLearner`) is used to construct the
81    classifier.
82
83    When constructing the classifier manually, the following parameters can
84    be passed:
85
86    :param classifiers: a list of boosted classifiers.
87    :type classifiers: list
88
89    :param name: name of the resulting classifier.
90    :type name: str
91
92    :param class_var: the class feature.
93    :type class_var: :class:`Orange.feature.Descriptor`
94
95    """
96
97    def __init__(self, classifiers, name, class_var, **kwds):
98        self.classifiers = classifiers
99        self.name = name
100        self.class_var = class_var
101        self.__dict__.update(kwds)
102
103    def __call__(self, instance, result_type=orange.GetValue):
104        """
105        :param instance: instance to be classified.
106        :type instance: :class:`Orange.data.Instance`
107
108        :param result_type: :class:`Orange.classification.Classifier.GetValue`
109              or :class:`Orange.classification.Classifier.GetProbabilities` or
110              :class:`Orange.classification.Classifier.GetBoth`
111
112        :rtype: :class:`Orange.data.Value`,
113              :class:`Orange.statistics.Distribution` or a tuple with both
114        """
115        if self.class_var.var_type == Orange.feature.Type.Discrete:
116            freq = [0.] * len(self.class_var.values)
117            for c in self.classifiers:
118                freq[int(c(instance))] += 1
119            index = freq.index(max(freq))
120            value = Orange.data.Value(self.class_var, index)
121            if result_type == orange.GetValue:
122                return value
123            for i in range(len(freq)):
124                freq[i] = freq[i] / len(self.classifiers)
125            freq = Orange.statistics.distribution.Discrete(
126                    freq, variable=self.class_var)
127
128            if result_type == orange.GetProbabilities:
129                return freq
130            elif result_type == orange.GetBoth:
131                return (value, freq)
132            else:
133                return value
134
135        elif self.class_var.var_type == Orange.feature.Type.Continuous:
136            votes = [c(instance, orange.GetBoth \
137                       if result_type == orange.GetProbabilities \
138                       else result_type) \
139                     for c in self.classifiers]
140
141            wsum = float(len(self.classifiers))
142            if result_type in [orange.GetBoth, orange.GetProbabilities]:
143                pred = sum([float(c) for c, p in votes]) / wsum
144#               prob = sum([float(p.modus()) for c, p in votes]) / wsum
145                from collections import defaultdict
146                prob = defaultdict(float)
147                for c, p in votes:
148                    for val, val_p in p.items():
149                        prob[float(val)] += val_p / wsum
150
151                prob = Orange.statistics.distribution.Continuous(
152                            prob, variable=self.class_var)
153
154                return (self.class_var(pred), prob) \
155                        if result_type == orange.GetBoth\
156                        else prob
157
158            elif result_type == orange.GetValue:
159                pred = sum([float(c) for c in votes]) / wsum
160                return self.class_var(pred)
161
162    def __reduce__(self):
163        return (type(self), (self.classifiers, self.name, self.class_var),
164                dict(self.__dict__))
165
166BaggedClassifier = Orange.utils.deprecated_members(
167    {"example": "instance", "classVar": "class_var",
168     "resultType": "result_type"}
169    )(BaggedClassifier)
Note: See TracBrowser for help on using the repository browser.