source: orange/orange/Orange/ensemble/bagging.py @ 8938:539c5cb1ec37

Revision 8938:539c5cb1ec37, 5.9 KB checked in by ales_erjavec <ales.erjavec@…>, 3 years ago (diff)

Sum the whole continuous distributions in BaggedClassifier.

Line 
1import math
2import random
3
4import Orange.core as orange
5import Orange
6
7class BaggedLearner(orange.Learner):
8    """
9    BaggedLearner takes a learner and returns a bagged learner, which is
10    essentially a wrapper around the learner passed as an argument. If
11    instances are passed in arguments, BaggedLearner returns a bagged
12    classifier. Both learner and classifier then behave just like any
13    other learner and classifier in Orange.
14
15    Bagging, in essence, takes training data and a learner, and builds *t*
16    classifiers, each time presenting a learner a bootstrap sample from the
17    training data. When given a test instance, classifiers vote on class,
18    and a bagged classifier returns a class with the highest number of votes.
19    As implemented in Orange, when class probabilities are requested, these
20    are proportional to the number of votes for a particular class.
21   
22    :param learner: learner to be bagged.
23    :type learner: :class:`Orange.core.Learner`
24    :param t: number of bagged classifiers, that is, classifiers created
25        when instances are passed to bagged learner.
26    :type t: int
27    :param name: name of the resulting learner.
28    :type name: str
29    :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier` or
30            :class:`Orange.ensemble.bagging.BaggedLearner`
31    """
32    def __new__(cls, learner, instances=None, weightId=None, **kwargs):
33        self = orange.Learner.__new__(cls, **kwargs)
34        if instances is not None:
35            self.__init__(self, learner, **kwargs)
36            return self.__call__(instances, weightId)
37        else:
38            return self
39       
40    def __init__(self, learner, t=10, name='Bagging'):
41        self.t = t
42        self.name = name
43        self.learner = learner
44
45    def __call__(self, instances, weight=0):
46        """
47        Learn from the given table of data instances.
48       
49        :param instances: data instances to learn from.
50        :type instances: Orange.data.Table
51        :param weight: ID of meta feature with weights of instances
52        :type weight: int
53        :rtype: :class:`Orange.ensemble.bagging.BaggedClassifier`
54       
55        """
56        r = random.Random()
57        r.seed(0)
58       
59        n = len(instances)
60        classifiers = []
61        for i in range(self.t):
62            selection = []
63            for i in range(n):
64                selection.append(r.randrange(n))
65            instances = Orange.data.Table(instances)
66            data = instances.getitems(selection)
67            classifiers.append(self.learner(data, weight))
68        return BaggedClassifier(classifiers = classifiers, name=self.name,\
69                    classVar=instances.domain.classVar)
70
71class BaggedClassifier(orange.Classifier):
72    """
73    A classifier that uses a bagging technique. Usually the learner
74    (:class:`Orange.ensemble.bagging.BaggedLearner`) is used to construct the
75    classifier.
76   
77    When constructing the classifier manually, the following parameters can
78    be passed:
79
80    :param classifiers: a list of boosted classifiers.
81    :type classifiers: list
82   
83    :param name: name of the resulting classifier.
84    :type name: str
85   
86    :param classVar: the class feature.
87    :type classVar: :class:`Orange.data.variable.Variable`
88
89    """
90
91    def __init__(self, classifiers, name, classVar, **kwds):
92        self.classifiers = classifiers
93        self.name = name
94        self.classVar = classVar
95        self.__dict__.update(kwds)
96
97    def __call__(self, instance, resultType = orange.GetValue):
98        """
99        :param instance: instance to be classified.
100        :type instance: :class:`Orange.data.Instance`
101       
102        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
103              :class:`Orange.classification.Classifier.GetProbabilities` or
104              :class:`Orange.classification.Classifier.GetBoth`
105       
106        :rtype: :class:`Orange.data.Value`,
107              :class:`Orange.statistics.Distribution` or a tuple with both
108        """
109        if self.classVar.varType == Orange.data.Type.Discrete:
110            freq = [0.] * len(self.classVar.values)
111            for c in self.classifiers:
112                freq[int(c(instance))] += 1
113            index = freq.index(max(freq))
114            value = Orange.data.Value(self.classVar, index)
115            if resultType == orange.GetValue:
116                return value
117            for i in range(len(freq)):
118                freq[i] = freq[i]/len(self.classifiers)
119            freq = Orange.statistics.distribution.Discrete(freq)
120            if resultType == orange.GetProbabilities:
121                return freq
122            elif resultType == orange.GetBoth:
123                return (value, freq)
124            else:
125                return value
126           
127        elif self.classVar.varType ==Orange.data.Type.Continuous:
128            votes = [c(instance, orange.GetBoth if resultType==\
129                orange.GetProbabilities else resultType) \
130                for c in self.classifiers]
131            wsum = float(len(self.classifiers))
132            if resultType in [orange.GetBoth, orange.GetProbabilities]:
133                pred = sum([float(c) for c, p in votes]) / wsum
134#               prob = sum([float(p.modus()) for c, p in votes]) / wsum
135                from collections import defaultdict
136                prob = defaultdict(float)
137                for c, p in votes:
138                    for val, val_p in p.items():
139                        prob[float(val)] += val_p / wsum
140                   
141                prob = Orange.statistics.distribution.Continuous(prob)
142                return (self.classVar(pred), prob) if resultType == orange.GetBoth\
143                    else prob
144            elif resultType == orange.GetValue:
145                pred = sum([float(c) for c in votes]) / wsum
146                return self.classVar(pred)
147           
148    def __reduce__(self):
149        return type(self), (self.classifiers, self.name, self.classVar), dict(self.__dict__)
150   
Note: See TracBrowser for help on using the repository browser.