#
source:
orange/Orange/classification/bayes.py
@
11459:fc07a5c346be

Revision 11459:fc07a5c346be, 8.1 KB checked in by Ales Erjavec <ales.erjavec@…>, 12 months ago (diff) |
---|

Rev | Line | |
---|---|---|

[8042] | 1 | import Orange |

[8862] | 2 | import Orange.core |

[8042] | 3 | from Orange.core import BayesClassifier as _BayesClassifier |

4 | from Orange.core import BayesLearner as _BayesLearner | |

5 | ||

6 | ||

7 | class NaiveLearner(Orange.classification.Learner): | |

8 | """ | |

9 | Probabilistic classifier based on applying Bayes' theorem (from Bayesian | |

[8863] | 10 | statistics) with strong (naive) independence assumptions. Constructor parameters |

11 | set the corresponding attributes. | |

[8042] | 12 | |

[8107] | 13 | .. attribute:: adjust_threshold |

[8042] | 14 | |

15 | If set and the class is binary, the classifier's | |

16 | threshold will be set as to optimize the classification accuracy. | |

17 | The threshold is tuned by observing the probabilities predicted on | |

18 | learning data. Setting it to True can increase the | |

19 | accuracy considerably | |

20 | ||

21 | .. attribute:: m | |

22 | ||

23 | m for m-estimate. If set, m-estimation of probabilities | |

[9635] | 24 | will be used using :class:`~Orange.statistics.estimate.M`. |

[8863] | 25 | This attribute is ignored if you also set :obj:`estimator_constructor`. |

[8042] | 26 | |

[8107] | 27 | .. attribute:: estimator_constructor |

[8042] | 28 | |

29 | Probability estimator constructor for | |

30 | prior class probabilities. Defaults to | |

[9635] | 31 | :class:`~Orange.statistics.estimate.RelativeFrequency`. |

[8863] | 32 | Setting this attribute disables the above described attribute :obj:`m`. |

[8042] | 33 | |

[8107] | 34 | .. attribute:: conditional_estimator_constructor |

[8042] | 35 | |

36 | Probability estimator constructor | |

37 | for conditional probabilities for discrete features. If omitted, | |

38 | the estimator for prior probabilities will be used. | |

39 | ||

[8107] | 40 | .. attribute:: conditional_estimator_constructor_continuous |

[8042] | 41 | |

42 | Probability estimator constructor for conditional probabilities for | |

43 | continuous features. Defaults to | |

[9635] | 44 | :class:`~Orange.statistics.estimate.Loess`. |

[8042] | 45 | """ |

46 | ||

[11459] | 47 | def __new__(cls, data=None, weight_id=0, **argkw): |

[8042] | 48 | self = Orange.classification.Learner.__new__(cls, **argkw) |

[11459] | 49 | if data is not None: |

[8042] | 50 | self.__init__(**argkw) |

[8862] | 51 | return self.__call__(data, weight_id) |

[8042] | 52 | else: |

53 | return self | |

54 | ||

55 | def __init__(self, adjust_threshold=False, m=0, estimator_constructor=None, | |

56 | conditional_estimator_constructor=None, | |

57 | conditional_estimator_constructor_continuous=None,**argkw): | |

58 | self.adjust_threshold = adjust_threshold | |

59 | self.m = m | |

60 | self.estimator_constructor = estimator_constructor | |

61 | self.conditional_estimator_constructor = conditional_estimator_constructor | |

62 | self.conditional_estimator_constructor_continuous = conditional_estimator_constructor_continuous | |

63 | self.__dict__.update(argkw) | |

64 | ||

[8862] | 65 | def __call__(self, data, weight=0): |

[8042] | 66 | """Learn from the given table of data instances. |

67 | ||

[8862] | 68 | :param data: Data instances to learn from. |

[9635] | 69 | :type data: :class:`~Orange.data.Table` |

[8042] | 70 | :param weight: Id of meta attribute with weights of instances |

[8862] | 71 | :type weight: int |

[9635] | 72 | :rtype: :class:`~Orange.classification.bayes.NaiveClassifier` |

[8042] | 73 | """ |

74 | bayes = _BayesLearner() | |

75 | if self.estimator_constructor: | |

76 | bayes.estimator_constructor = self.estimator_constructor | |

77 | if self.m: | |

78 | if not hasattr(bayes.estimator_constructor, "m"): | |

79 | raise AttributeError, "invalid combination of attributes: 'estimator_constructor' does not expect 'm'" | |

80 | else: | |

81 | self.estimator_constructor.m = self.m | |

82 | elif self.m: | |

83 | bayes.estimator_constructor = Orange.core.ProbabilityEstimatorConstructor_m(m = self.m) | |

84 | if self.conditional_estimator_constructor: | |

85 | bayes.conditional_estimator_constructor = self.conditional_estimator_constructor | |

86 | elif bayes.estimator_constructor: | |

87 | bayes.conditional_estimator_constructor = Orange.core.ConditionalProbabilityEstimatorConstructor_ByRows() | |

88 | bayes.conditional_estimator_constructor.estimator_constructor=bayes.estimator_constructor | |

89 | if self.conditional_estimator_constructor_continuous: | |

90 | bayes.conditional_estimator_constructor_continuous = self.conditional_estimator_constructor_continuous | |

91 | if self.adjust_threshold: | |

92 | bayes.adjust_threshold = self.adjust_threshold | |

[8862] | 93 | return NaiveClassifier(bayes(data, weight)) |

[10580] | 94 | NaiveLearner = Orange.utils.deprecated_members( |

[8042] | 95 | { "adjustThreshold": "adjust_threshold", |

96 | "estimatorConstructor": "estimator_constructor", | |

97 | "conditionalEstimatorConstructor": "conditional_estimator_constructor", | |

98 | "conditionalEstimatorConstructorContinuous":"conditional_estimator_constructor_continuous", | |

99 | "weightID": "weight_id" | |

[10188] | 100 | })(NaiveLearner) |

[8042] | 101 | |

102 | ||

103 | class NaiveClassifier(Orange.classification.Classifier): | |

104 | """ | |

[8864] | 105 | Predictor based on calculated probabilities. |

[8042] | 106 | |

107 | .. attribute:: distribution | |

108 | ||

109 | Stores probabilities of classes, i.e. p(C) for each class C. | |

110 | ||

111 | .. attribute:: estimator | |

112 | ||

113 | An object that returns a probability of class p(C) for a given class C. | |

114 | ||

[8107] | 115 | .. attribute:: conditional_distributions |

[8042] | 116 | |

117 | A list of conditional probabilities. | |

118 | ||

[8107] | 119 | .. attribute:: conditional_estimators |

[8042] | 120 | |

121 | A list of estimators for conditional probabilities. | |

122 | ||

[8107] | 123 | .. attribute:: adjust_threshold |

[8042] | 124 | |

125 | For binary classes, this tells the learner to | |

126 | determine the optimal threshold probability according to 0-1 | |

127 | loss on the training set. For multiple class problems, it has | |

128 | no effect. | |

129 | """ | |

130 | ||

[8107] | 131 | def __init__(self, base_classifier=None): |

132 | if not base_classifier: base_classifier = _BayesClassifier() | |

133 | self.native_bayes_classifier = base_classifier | |

134 | for k, v in self.native_bayes_classifier.__dict__.items(): | |

[8042] | 135 | self.__dict__[k] = v |

136 | ||

137 | def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue, | |

138 | *args, **kwdargs): | |

139 | """Classify a new instance. | |

140 | ||

141 | :param instance: instance to be classified. | |

[9635] | 142 | :type instance: :class:`~Orange.data.Instance` |

143 | :param result_type: :class:`~Orange.classification.Classifier.GetValue` or | |

144 | :class:`~Orange.classification.Classifier.GetProbabilities` or | |

145 | :class:`~Orange.classification.Classifier.GetBoth` | |

[8042] | 146 | |

[9635] | 147 | :rtype: :class:`~Orange.data.Value`, |

148 | :class:`~Orange.statistics.distribution.Distribution` or a | |

149 | tuple with both | |

[8042] | 150 | """ |

[8107] | 151 | return self.native_bayes_classifier(instance, result_type, *args, **kwdargs) |

[8042] | 152 | |

153 | def __setattr__(self, name, value): | |

[8107] | 154 | if name == "native_bayes_classifier": |

[8042] | 155 | self.__dict__[name] = value |

156 | return | |

[8107] | 157 | if name in self.native_bayes_classifier.__dict__: |

158 | self.native_bayes_classifier.__dict__[name] = value | |

[8042] | 159 | self.__dict__[name] = value |

160 | ||

161 | def p(self, class_, instance): | |

162 | """ | |

163 | Return probability of a single class. | |

164 | Probability is not normalized and can be different from probability | |

165 | returned from __call__. | |

166 | ||

[8862] | 167 | :param class_: class value for which the probability should be |

[8042] | 168 | output. |

[9635] | 169 | :type class_: :class:`~Orange.data.Value` |

[8042] | 170 | :param instance: instance to be classified. |

[9635] | 171 | :type instance: :class:`~Orange.data.Instance` |

[8042] | 172 | |

173 | """ | |

[8107] | 174 | return self.native_bayes_classifier.p(class_, instance) |

[8042] | 175 | |

176 | def __str__(self): | |

[8107] | 177 | """Return classifier in human friendly format.""" |

178 | nvalues=len(self.class_var.values) | |

179 | frmtStr=' %10.3f'*nvalues | |

180 | classes=" "*20+ ((' %10s'*nvalues) % tuple([i[:10] for i in self.class_var.values])) | |

[8042] | 181 | |

182 | return "\n".join([ | |

183 | classes, | |

184 | "class probabilities "+(frmtStr % tuple(self.distribution)), | |

185 | "", | |

[9333] | 186 | "\n\n".join(["\n".join([ |

[8042] | 187 | "Attribute " + i.variable.name, |

188 | classes, | |

189 | "\n".join( | |

190 | ("%20s" % i.variable.values[v][:20]) + (frmtStr % tuple(i[v])) | |

191 | for v in xrange(len(i.variable.values)))] | |

[8107] | 192 | ) for i in self.conditional_distributions |

[9292] | 193 | if i.variable.var_type == i.variable.Discrete])]) |

[8042] | 194 | |

195 | ||

196 | def printModel(model): | |

197 | print NaiveClassifier(model) |

**Note:**See TracBrowser for help on using the repository browser.