source: orange/Orange/ensemble/boosting.py @ 9671:a7b056375472

Revision 9671:a7b056375472, 5.7 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Moved orange to Orange (part 2)

Line 
1import Orange
2import Orange.core as orange
3
4_inf = 100000
5
6class BoostedLearner(orange.Learner):
7    """
8    Instead of drawing a series of bootstrap samples from the training set,
9    bootstrap maintains a weight for each instance. When a classifier is
10    trained from the training set, the weights for misclassified instances
11    are increased. Just like in a bagged learner, the class is decided based
12    on voting of classifiers, but in boosting votes are weighted by accuracy
13    obtained on training set.
14
15    BoostedLearner is an implementation of AdaBoost.M1 (Freund and Shapire,
16    1996). From user's viewpoint, the use of the BoostedLearner is similar to
17    that of BaggedLearner. The learner passed as an argument needs to deal
18    with instance weights.
19   
20    :param learner: learner to be boosted.
21    :type learner: :class:`Orange.core.Learner`
22    :param t: number of boosted classifiers created from the instance set.
23    :type t: int
24    :param name: name of the resulting learner.
25    :type name: str
26    :rtype: :class:`Orange.ensemble.boosting.BoostedClassifier` or
27            :class:`Orange.ensemble.boosting.BoostedLearner`
28    """
29    def __new__(cls, learner, instances=None, weightId=None, **kwargs):
30        self = orange.Learner.__new__(cls, **kwargs)
31        if instances is not None:
32            self.__init__(self, learner, **kwargs)
33            return self.__call__(instances, weightId)
34        else:
35            return self
36
37    def __init__(self, learner, t=10, name='AdaBoost.M1'):
38        self.t = t
39        self.name = name
40        self.learner = learner
41
42    def __call__(self, instances, origWeight = 0):
43        """
44        Learn from the given table of data instances.
45       
46        :param instances: data instances to learn from.
47        :type instances: Orange.data.Table
48        :param origWeight: weight.
49        :type origWeight: int
50        :rtype: :class:`Orange.ensemble.boosting.BoostedClassifier`
51       
52        """
53        import math
54        weight = Orange.data.new_meta_id()
55        if origWeight:
56            for i in instances:
57                i.setweight(weight, i.getweight(origWeight))
58        else:
59            instances.addMetaAttribute(weight, 1.0)
60           
61        n = len(instances)
62        classifiers = []
63        for i in range(self.t):
64            epsilon = 0.0
65            classifier = self.learner(instances, weight)
66            corr = []
67            for ex in instances:
68                if classifier(ex) != ex.getclass():
69                    epsilon += ex.getweight(weight)
70                    corr.append(0)
71                else:
72                    corr.append(1)
73            epsilon = epsilon / float(reduce(lambda x,y:x+y.getweight(weight), 
74                instances, 0))
75            classifiers.append((classifier, epsilon and math.log(
76                (1-epsilon)/epsilon) or _inf))
77            if epsilon==0 or epsilon >= 0.499:
78                if epsilon >= 0.499 and len(classifiers)>1:
79                    del classifiers[-1]
80                instances.removeMetaAttribute(weight)
81                return BoostedClassifier(classifiers = classifiers, 
82                    name=self.name, classVar=instances.domain.classVar)
83            beta = epsilon/(1-epsilon)
84            for e in range(n):
85                if corr[e]:
86                    instances[e].setweight(weight, instances[e].getweight(weight)*beta)
87            f = 1/float(sum([e.getweight(weight) for e in instances]))
88            for e in range(n):
89                instances[e].setweight(weight, instances[e].getweight(weight)*f)
90
91        instances.removeMetaAttribute(weight)
92        return BoostedClassifier(classifiers = classifiers, name=self.name, 
93            classVar=instances.domain.classVar)
94
95class BoostedClassifier(orange.Classifier):
96    """
97    A classifier that uses a boosting technique. Usually the learner
98    (:class:`Orange.ensemble.boosting.BoostedLearner`) is used to construct the
99    classifier.
100   
101    When constructing the classifier manually, the following parameters can
102    be passed:
103
104    :param classifiers: a list of boosted classifiers.
105    :type classifiers: list
106   
107    :param name: name of the resulting classifier.
108    :type name: str
109   
110    :param classVar: the class feature.
111    :type classVar: :class:`Orange.data.variable.Variable`
112   
113    """
114
115    def __init__(self, classifiers, name, classVar, **kwds):
116        self.classifiers = classifiers
117        self.name = name
118        self.classVar = classVar
119        self.__dict__.update(kwds)
120
121    def __call__(self, instance, resultType = orange.GetValue):
122        """
123        :param instance: instance to be classified.
124        :type instance: :class:`Orange.data.Instance`
125       
126        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
127              :class:`Orange.classification.Classifier.GetProbabilities` or
128              :class:`Orange.classification.Classifier.GetBoth`
129       
130        :rtype: :class:`Orange.data.Value`,
131              :class:`Orange.statistics.Distribution` or a tuple with both
132        """
133        votes = Orange.statistics.distribution.Discrete(self.classVar)
134        for c, e in self.classifiers:
135            votes[int(c(instance))] += e
136        index = Orange.misc.selection.selectBestIndex(votes)
137        # TODO
138        value = Orange.data.Value(self.classVar, index)
139        if resultType == orange.GetValue:
140            return value
141        sv = sum(votes)
142        for i in range(len(votes)):
143            votes[i] = votes[i]/sv
144        if resultType == orange.GetProbabilities:
145            return votes
146        elif resultType == orange.GetBoth:
147            return (value, votes)
148        else:
149            return value
150       
151    def __reduce__(self):
152        return type(self), (self.classifiers, self.name, self.classVar), dict(self.__dict__)
153   
Note: See TracBrowser for help on using the repository browser.