source: orange/Orange/ensemble/bagging.py @ 11630:5cfa71596edd

Revision 11630:5cfa71596edd, 6.4 KB checked in by Ales Erjavec <ales.erjavec@…>, 9 months ago (diff)

Fixed pickling of Boosted/Bagged/StackedLearner.

Line 
1import math
2import random
3
4import Orange.core as orange
5import Orange
6
7
8class BaggedLearner(orange.Learner):
9    """
10    BaggedLearner takes a learner and returns a bagged learner, which is
11    essentially a wrapper around the learner passed as an argument. If
12    instances are passed in arguments, BaggedLearner returns a bagged
13    classifier. Both learner and classifier then behave just like any
14    other learner and classifier in Orange.
15
16    Bagging, in essence, takes training data and a learner, and builds *t*
17    classifiers, each time presenting a learner a bootstrap sample from the
18    training data. When given a test instance, classifiers vote on class,
19    and a bagged classifier returns a class with the highest number of votes.
20    As implemented in Orange, when class probabilities are requested, these
21    are proportional to the number of votes for a particular class.
22
23    :param learner: learner to be bagged.
24    :type learner: :class:`Orange.core.Learner`
25    :param t: number of bagged classifiers, that is, classifiers created
26        when instances are passed to bagged learner.
27    :type t: int
28    :param name: name of the resulting learner.
29    :type name: str
30    :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier` or
31            :class:`Orange.ensemble.bagging.BaggedLearner`
32    """
33    def __new__(cls, learner, instances=None, weight_id=None, **kwargs):
34        self = orange.Learner.__new__(cls, **kwargs)
35        if instances is not None:
36            self.__init__(self, learner, **kwargs)
37            return self.__call__(instances, weight_id)
38        else:
39            return self
40
41    def __init__(self, learner, t=10, name='Bagging'):
42        self.t = t
43        self.name = name
44        self.learner = learner
45
46    def __call__(self, instances, weight=0):
47        """
48        Learn from the given table of data instances.
49
50        :param instances: data instances to learn from.
51        :type instances: Orange.data.Table
52        :param weight: ID of meta feature with weights of instances
53        :type weight: int
54        :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier`
55
56        """
57        r = random.Random()
58        r.seed(0)
59
60        n = len(instances)
61        classifiers = []
62        for i in range(self.t):
63            selection = []
64            for i in range(n):
65                selection.append(r.randrange(n))
66            instances = Orange.data.Table(instances)
67            data = instances.getitems(selection)
68            classifiers.append(self.learner(data, weight))
69        return BaggedClassifier(classifiers=classifiers, name=self.name, \
70                    class_var=instances.domain.class_var)
71
72    def __reduce__(self):
73        return type(self), (self.learner,), dict(self.__dict__)
74
75BaggedLearner = Orange.utils.deprecated_members(
76    {"weightId": "weight_id", "examples": "instances"}
77    )(BaggedLearner)
78
79
80class BaggedClassifier(orange.Classifier):
81    """
82    A classifier that uses a bagging technique. Usually the learner
83    (:class:`Orange.ensemble.bagging.BaggedLearner`) is used to construct the
84    classifier.
85
86    When constructing the classifier manually, the following parameters can
87    be passed:
88
89    :param classifiers: a list of boosted classifiers.
90    :type classifiers: list
91
92    :param name: name of the resulting classifier.
93    :type name: str
94
95    :param class_var: the class feature.
96    :type class_var: :class:`Orange.feature.Descriptor`
97
98    """
99
100    def __init__(self, classifiers, name, class_var, **kwds):
101        self.classifiers = classifiers
102        self.name = name
103        self.class_var = class_var
104        self.__dict__.update(kwds)
105
106    def __call__(self, instance, result_type=orange.GetValue):
107        """
108        :param instance: instance to be classified.
109        :type instance: :class:`Orange.data.Instance`
110
111        :param result_type: :class:`Orange.classification.Classifier.GetValue`
112              or :class:`Orange.classification.Classifier.GetProbabilities` or
113              :class:`Orange.classification.Classifier.GetBoth`
114
115        :rtype: :class:`Orange.data.Value`,
116              :class:`Orange.statistics.Distribution` or a tuple with both
117        """
118        if self.class_var.var_type == Orange.feature.Type.Discrete:
119            freq = [0.] * len(self.class_var.values)
120            for c in self.classifiers:
121                freq[int(c(instance))] += 1
122            index = freq.index(max(freq))
123            value = Orange.data.Value(self.class_var, index)
124            if result_type == orange.GetValue:
125                return value
126            for i in range(len(freq)):
127                freq[i] = freq[i] / len(self.classifiers)
128            freq = Orange.statistics.distribution.Discrete(
129                    freq, variable=self.class_var)
130
131            if result_type == orange.GetProbabilities:
132                return freq
133            elif result_type == orange.GetBoth:
134                return (value, freq)
135            else:
136                return value
137
138        elif self.class_var.var_type == Orange.feature.Type.Continuous:
139            votes = [c(instance, orange.GetBoth \
140                       if result_type == orange.GetProbabilities \
141                       else result_type) \
142                     for c in self.classifiers]
143
144            wsum = float(len(self.classifiers))
145            if result_type in [orange.GetBoth, orange.GetProbabilities]:
146                pred = sum([float(c) for c, p in votes]) / wsum
147#               prob = sum([float(p.modus()) for c, p in votes]) / wsum
148                from collections import defaultdict
149                prob = defaultdict(float)
150                for c, p in votes:
151                    for val, val_p in p.items():
152                        prob[float(val)] += val_p / wsum
153
154                prob = Orange.statistics.distribution.Continuous(
155                            prob, variable=self.class_var)
156
157                return (self.class_var(pred), prob) \
158                        if result_type == orange.GetBoth\
159                        else prob
160
161            elif result_type == orange.GetValue:
162                pred = sum([float(c) for c in votes]) / wsum
163                return self.class_var(pred)
164
165    def __reduce__(self):
166        return (type(self), (self.classifiers, self.name, self.class_var),
167                dict(self.__dict__))
168
169BaggedClassifier = Orange.utils.deprecated_members(
170    {"example": "instance", "classVar": "class_var",
171     "resultType": "result_type"}
172    )(BaggedClassifier)
Note: See TracBrowser for help on using the repository browser.