source: orange/Orange/ensemble/bagging.py @ 9923:5f55c90e0077

Revision 9923:5f55c90e0077, 6.2 KB checked in by markotoplak, 2 years ago (diff)

data.variable -> feature.

Line 
1import math
2import random
3
4import Orange.core as orange
5import Orange
6
7class BaggedLearner(orange.Learner):
8    """
9    BaggedLearner takes a learner and returns a bagged learner, which is
10    essentially a wrapper around the learner passed as an argument. If
11    instances are passed in arguments, BaggedLearner returns a bagged
12    classifier. Both learner and classifier then behave just like any
13    other learner and classifier in Orange.
14
15    Bagging, in essence, takes training data and a learner, and builds *t*
16    classifiers, each time presenting a learner a bootstrap sample from the
17    training data. When given a test instance, classifiers vote on class,
18    and a bagged classifier returns a class with the highest number of votes.
19    As implemented in Orange, when class probabilities are requested, these
20    are proportional to the number of votes for a particular class.
21   
22    :param learner: learner to be bagged.
23    :type learner: :class:`Orange.core.Learner`
24    :param t: number of bagged classifiers, that is, classifiers created
25        when instances are passed to bagged learner.
26    :type t: int
27    :param name: name of the resulting learner.
28    :type name: str
29    :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier` or
30            :class:`Orange.ensemble.bagging.BaggedLearner`
31    """
32    def __new__(cls, learner, instances=None, weight_id=None, **kwargs):
33        self = orange.Learner.__new__(cls, **kwargs)
34        if instances is not None:
35            self.__init__(self, learner, **kwargs)
36            return self.__call__(instances, weight_id)
37        else:
38            return self
39       
40    def __init__(self, learner, t=10, name='Bagging'):
41        self.t = t
42        self.name = name
43        self.learner = learner
44
45    def __call__(self, instances, weight=0):
46        """
47        Learn from the given table of data instances.
48       
49        :param instances: data instances to learn from.
50        :type instances: Orange.data.Table
51        :param weight: ID of meta feature with weights of instances
52        :type weight: int
53        :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier`
54       
55        """
56        r = random.Random()
57        r.seed(0)
58       
59        n = len(instances)
60        classifiers = []
61        for i in range(self.t):
62            selection = []
63            for i in range(n):
64                selection.append(r.randrange(n))
65            instances = Orange.data.Table(instances)
66            data = instances.getitems(selection)
67            classifiers.append(self.learner(data, weight))
68        return BaggedClassifier(classifiers = classifiers, name=self.name,\
69                    class_var=instances.domain.class_var)
70BaggedLearner = Orange.misc.deprecated_members({"weightId":"weight_id", "examples":"instances"})(BaggedLearner)
71
72class BaggedClassifier(orange.Classifier):
73    """
74    A classifier that uses a bagging technique. Usually the learner
75    (:class:`Orange.ensemble.bagging.BaggedLearner`) is used to construct the
76    classifier.
77   
78    When constructing the classifier manually, the following parameters can
79    be passed:
80
81    :param classifiers: a list of boosted classifiers.
82    :type classifiers: list
83   
84    :param name: name of the resulting classifier.
85    :type name: str
86   
87    :param class_var: the class feature.
88    :type class_var: :class:`Orange.feature.Descriptor`
89
90    """
91
92    def __init__(self, classifiers, name, class_var, **kwds):
93        self.classifiers = classifiers
94        self.name = name
95        self.class_var = class_var
96        self.__dict__.update(kwds)
97
98    def __call__(self, instance, result_type = orange.GetValue):
99        """
100        :param instance: instance to be classified.
101        :type instance: :class:`Orange.data.Instance`
102       
103        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
104              :class:`Orange.classification.Classifier.GetProbabilities` or
105              :class:`Orange.classification.Classifier.GetBoth`
106       
107        :rtype: :class:`Orange.data.Value`,
108              :class:`Orange.statistics.Distribution` or a tuple with both
109        """
110        if self.class_var.var_type == Orange.feature.Type.Discrete:
111            freq = [0.] * len(self.class_var.values)
112            for c in self.classifiers:
113                freq[int(c(instance))] += 1
114            index = freq.index(max(freq))
115            value = Orange.data.Value(self.class_var, index)
116            if result_type == orange.GetValue:
117                return value
118            for i in range(len(freq)):
119                freq[i] = freq[i]/len(self.classifiers)
120            freq = Orange.statistics.distribution.Discrete(freq)
121            if result_type == orange.GetProbabilities:
122                return freq
123            elif result_type == orange.GetBoth:
124                return (value, freq)
125            else:
126                return value
127           
128        elif self.class_var.var_type ==Orange.feature.Type.Continuous:
129            votes = [c(instance, orange.GetBoth if result_type==\
130                orange.GetProbabilities else result_type) \
131                for c in self.classifiers]
132            wsum = float(len(self.classifiers))
133            if result_type in [orange.GetBoth, orange.GetProbabilities]:
134                pred = sum([float(c) for c, p in votes]) / wsum
135#               prob = sum([float(p.modus()) for c, p in votes]) / wsum
136                from collections import defaultdict
137                prob = defaultdict(float)
138                for c, p in votes:
139                    for val, val_p in p.items():
140                        prob[float(val)] += val_p / wsum
141                   
142                prob = Orange.statistics.distribution.Continuous(prob)
143                return (self.class_var(pred), prob) if result_type == orange.GetBoth\
144                    else prob
145            elif result_type == orange.GetValue:
146                pred = sum([float(c) for c in votes]) / wsum
147                return self.class_var(pred)
148           
149    def __reduce__(self):
150        return type(self), (self.classifiers, self.name, self.class_var), dict(self.__dict__)
151BaggedClassifier = Orange.misc.deprecated_members({"example":"instance", "classVar":"class_var","resultType":"result_type"})(BaggedClassifier)
Note: See TracBrowser for help on using the repository browser.