source: orange/orange/Orange/classification/bayes.py @ 9635:f7279fdd2d42

Revision 9635:f7279fdd2d42, 8.1 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Improved bayes documentation.

Line 
1import Orange
2import Orange.core
3from Orange.core import BayesClassifier as _BayesClassifier
4from Orange.core import BayesLearner as _BayesLearner
5
6
7class NaiveLearner(Orange.classification.Learner):
8    """
9    Probabilistic classifier based on applying Bayes' theorem (from Bayesian
10    statistics) with strong (naive) independence assumptions. Constructor parameters
11    set the corresponding attributes.
12   
13    .. attribute:: adjust_threshold
14   
15        If set and the class is binary, the classifier's
16        threshold will be set as to optimize the classification accuracy.
17        The threshold is tuned by observing the probabilities predicted on
18        learning data. Setting it to True can increase the
19        accuracy considerably
20       
21    .. attribute:: m
22   
23        m for m-estimate. If set, m-estimation of probabilities
24        will be used using :class:`~Orange.statistics.estimate.M`.
25        This attribute is ignored if you also set :obj:`estimator_constructor`.
26       
27    .. attribute:: estimator_constructor
28   
29        Probability estimator constructor for
30        prior class probabilities. Defaults to
31        :class:`~Orange.statistics.estimate.RelativeFrequency`.
32        Setting this attribute disables the above described attribute :obj:`m`.
33       
34    .. attribute:: conditional_estimator_constructor
35   
36        Probability estimator constructor
37        for conditional probabilities for discrete features. If omitted,
38        the estimator for prior probabilities will be used.
39       
40    .. attribute:: conditional_estimator_constructor_continuous
41   
42        Probability estimator constructor for conditional probabilities for
43        continuous features. Defaults to
44        :class:`~Orange.statistics.estimate.Loess`.
45    """
46   
47    def __new__(cls, data = None, weight_id = 0, **argkw):
48        self = Orange.classification.Learner.__new__(cls, **argkw)
49        if data:
50            self.__init__(**argkw)
51            return self.__call__(data, weight_id)
52        else:
53            return self
54       
55    def __init__(self, adjust_threshold=False, m=0, estimator_constructor=None,
56                 conditional_estimator_constructor=None,
57                 conditional_estimator_constructor_continuous=None,**argkw):
58        self.adjust_threshold = adjust_threshold
59        self.m = m
60        self.estimator_constructor = estimator_constructor
61        self.conditional_estimator_constructor = conditional_estimator_constructor
62        self.conditional_estimator_constructor_continuous = conditional_estimator_constructor_continuous
63        self.__dict__.update(argkw)
64
65    def __call__(self, data, weight=0):
66        """Learn from the given table of data instances.
67       
68        :param data: Data instances to learn from.
69        :type data: :class:`~Orange.data.Table`
70        :param weight: Id of meta attribute with weights of instances
71        :type weight: int
72        :rtype: :class:`~Orange.classification.bayes.NaiveClassifier`
73        """
74        bayes = _BayesLearner()
75        if self.estimator_constructor:
76            bayes.estimator_constructor = self.estimator_constructor
77            if self.m:
78                if not hasattr(bayes.estimator_constructor, "m"):
79                    raise AttributeError, "invalid combination of attributes: 'estimator_constructor' does not expect 'm'"
80                else:
81                    self.estimator_constructor.m = self.m
82        elif self.m:
83            bayes.estimator_constructor = Orange.core.ProbabilityEstimatorConstructor_m(m = self.m)
84        if self.conditional_estimator_constructor:
85            bayes.conditional_estimator_constructor = self.conditional_estimator_constructor
86        elif bayes.estimator_constructor:
87            bayes.conditional_estimator_constructor = Orange.core.ConditionalProbabilityEstimatorConstructor_ByRows()
88            bayes.conditional_estimator_constructor.estimator_constructor=bayes.estimator_constructor
89        if self.conditional_estimator_constructor_continuous:
90            bayes.conditional_estimator_constructor_continuous = self.conditional_estimator_constructor_continuous
91        if self.adjust_threshold:
92            bayes.adjust_threshold = self.adjust_threshold
93        return NaiveClassifier(bayes(data, weight))
94NaiveLearner = Orange.misc.deprecated_members(
95{     "adjustThreshold": "adjust_threshold",
96      "estimatorConstructor": "estimator_constructor",
97      "conditionalEstimatorConstructor": "conditional_estimator_constructor",
98      "conditionalEstimatorConstructorContinuous":"conditional_estimator_constructor_continuous",
99      "weightID": "weight_id"
100}, in_place=True)(NaiveLearner)
101
102
103class NaiveClassifier(Orange.classification.Classifier):
104    """
105    Predictor based on calculated probabilities.
106   
107    .. attribute:: distribution
108   
109        Stores probabilities of classes, i.e. p(C) for each class C.
110       
111    .. attribute:: estimator
112   
113        An object that returns a probability of class p(C) for a given class C.
114       
115    .. attribute:: conditional_distributions
116   
117        A list of conditional probabilities.
118       
119    .. attribute:: conditional_estimators
120   
121        A list of estimators for conditional probabilities.
122       
123    .. attribute:: adjust_threshold
124   
125        For binary classes, this tells the learner to
126        determine the optimal threshold probability according to 0-1
127        loss on the training set. For multiple class problems, it has
128        no effect.
129    """
130   
131    def __init__(self, base_classifier=None):
132        if not base_classifier: base_classifier = _BayesClassifier()
133        self.native_bayes_classifier = base_classifier
134        for k, v in self.native_bayes_classifier.__dict__.items():
135            self.__dict__[k] = v
136 
137    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue,
138                 *args, **kwdargs):
139        """Classify a new instance.
140       
141        :param instance: instance to be classified.
142        :type instance: :class:`~Orange.data.Instance`
143        :param result_type: :class:`~Orange.classification.Classifier.GetValue` or
144              :class:`~Orange.classification.Classifier.GetProbabilities` or
145              :class:`~Orange.classification.Classifier.GetBoth`
146       
147        :rtype: :class:`~Orange.data.Value`,
148              :class:`~Orange.statistics.distribution.Distribution` or a
149              tuple with both
150        """
151        return self.native_bayes_classifier(instance, result_type, *args, **kwdargs)
152
153    def __setattr__(self, name, value):
154        if name == "native_bayes_classifier":
155            self.__dict__[name] = value
156            return
157        if name in self.native_bayes_classifier.__dict__:
158            self.native_bayes_classifier.__dict__[name] = value
159        self.__dict__[name] = value
160   
161    def p(self, class_, instance):
162        """
163        Return probability of a single class.
164        Probability is not normalized and can be different from probability
165        returned from __call__.
166       
167        :param class_: class value for which the probability should be
168                output.
169        :type class_: :class:`~Orange.data.Value`
170        :param instance: instance to be classified.
171        :type instance: :class:`~Orange.data.Instance`
172       
173        """
174        return self.native_bayes_classifier.p(class_, instance)
175   
176    def __str__(self):
177        """Return classifier in human friendly format."""
178        nvalues=len(self.class_var.values)
179        frmtStr=' %10.3f'*nvalues
180        classes=" "*20+ ((' %10s'*nvalues) % tuple([i[:10] for i in self.class_var.values]))
181       
182        return "\n".join([
183            classes,
184            "class probabilities "+(frmtStr % tuple(self.distribution)),
185            "",
186            "\n\n".join(["\n".join([
187                "Attribute " + i.variable.name,
188                classes,
189                "\n".join(
190                    ("%20s" % i.variable.values[v][:20]) + (frmtStr % tuple(i[v]))
191                    for v in xrange(len(i.variable.values)))]
192                ) for i in self.conditional_distributions
193                        if i.variable.var_type == i.variable.Discrete])])
194           
195
196def printModel(model):
197    print NaiveClassifier(model)
Note: See TracBrowser for help on using the repository browser.