source: orange/Orange/feature/discretization.py @ 9813:93f1286f25e8

Revision 9813:93f1286f25e8, 6.0 KB checked in by blaz <blaz.zupan@…>, 2 years ago (diff)

Merge

Line 
1import Orange
2import Orange.core as orange
3
4from Orange.core import \
5    Discrete2Continuous, \
6    Discretizer, \
7    BiModalDiscretizer, \
8    EquiDistDiscretizer as EqualWidthDiscretizer, \
9    IntervalDiscretizer, \
10    ThresholdDiscretizer,\
11    EntropyDiscretization as Entropy, \
12    EquiDistDiscretization as EqualWidth, \
13    EquiNDiscretization as EqualFreq, \
14    BiModalDiscretization as BiModal, \
15    Discretization, \
16    Preprocessor_discretize
17
18
19
20def entropyDiscretization_wrapper(data):
21    """Discretize all continuous features in class-labeled data set with the entropy-based discretization
22    :obj:`Entropy`.
23   
24    :param data: data to discretize.
25    :type data: Orange.data.Table
26    :rtype: :obj:`Orange.data.Table` includes all categorical and discretized\
27    continuous features from the original data table.
28   
29    After categorization, features that were categorized to a single interval
30    (to a constant value) are removed from table and prints their names.
31    Returns a table that
32
33    """
34    orange.setrandseed(0)
35    data_new = orange.Preprocessor_discretize(data, method=Entropy())
36   
37    attrlist = []
38    nrem = 0
39    for i in data_new.domain.attributes:
40        if (len(i.values)>1):
41            attrlist.append(i)
42        else:
43            nrem=nrem+1
44    attrlist.append(tablen.domain.classVar)
45    return data_new.select(attrlist)
46
47
48class EntropyDiscretization_wrapper:
49    """This is simple wrapper class around the function
50    :obj:`entropyDiscretization`.
51   
52    :param data: data to discretize.
53    :type data: Orange.data.Table
54   
55    Once invoked it would either create an object that can be passed a data
56    set for discretization, or if invoked with the data set, would return a
57    discretized data set::
58
59        discretizer = Orange.feature.dicretization.EntropyDiscretization()
60        disc_data = discretizer(table)
61        another_disc_data = Orange.feature.dicretization.EntropyDiscretization(table)
62
63    """
64    def __call__(self, data):
65        return entropyDiscretization(data)
66
67def DiscretizedLearner(baseLearner, examples=None, weight=0, **kwds):
68  learner = apply(DiscretizedLearner_Class, [baseLearner], kwds)
69  if examples: return learner(examples, weight)
70  else: return learner
71
72class DiscretizedLearner_Class:
73    """This class allows to set an learner object, such that before learning a
74    data passed to a learner is discretized. In this way we can prepare an
75    object that lears without giving it the data, and, for instance, use it in
76    some standard testing procedure that repeats learning/testing on several
77    data samples.
78
79    :param baseLearner: learner to which give discretized data
80    :type baseLearner: Orange.classification.Learner
81   
82    :param table: data whose continuous features need to be discretized
83    :type table: Orange.data.Table
84   
85    :param discretizer: a discretizer that converts continuous values into
86      discrete. Defaults to
87      :obj:`Orange.feature.discretization.EntropyDiscretization`.
88    :type discretizer: Orange.feature.discretization.Discretization
89   
90    :param name: name to assign to learner
91    :type name: string
92
93    An example on how such learner is set and used in ten-fold cross validation
94    is given below::
95
96        from Orange.feature import discretization
97        bayes = Orange.classification.bayes.NaiveBayesLearner()
98        disc = orange.Preprocessor_discretize(method=discretization.EquiNDiscretization(numberOfIntervals=10))
99        dBayes = discretization.DiscretizedLearner(bayes, name='disc bayes')
100        dbayes2 = discretization.DiscretizedLearner(bayes, name="EquiNBayes", discretizer=disc)
101        results = Orange.evaluation.testing.CrossValidation([dBayes], table)
102        classifier = discretization.DiscretizedLearner(bayes, examples=table)
103
104    """
105    def __init__(self, baseLearner, discretizer=Entropy(), **kwds):
106        self.baseLearner = baseLearner
107        if hasattr(baseLearner, "name"):
108            self.name = baseLearner.name
109        self.discretizer = discretizer
110        self.__dict__.update(kwds)
111    def __call__(self, data, weight=None):
112        # filter the data and then learn
113        from Orange.preprocess import Preprocessor_discretize
114        ddata = Preprocessor_discretize(data, method=self.discretizer)
115        if weight<>None:
116            model = self.baseLearner(ddata, weight)
117        else:
118            model = self.baseLearner(ddata)
119        dcl = DiscretizedClassifier(classifier = model)
120        if hasattr(model, "domain"):
121            dcl.domain = model.domain
122        if hasattr(model, "name"):
123            dcl.name = model.name
124        return dcl
125
126class DiscretizedClassifier:
127  def __init__(self, **kwds):
128    self.__dict__.update(kwds)
129  def __call__(self, example, resultType = orange.GetValue):
130    return self.classifier(example, resultType)
131
132class DiscretizeTable(object):
133    """Discretizes all continuous features of the data table.
134
135    :param data: data to discretize.
136    :type data: :class:`Orange.data.Table`
137
138    :param features: data features to discretize. None (default) to discretize all features.
139    :type features: list of :class:`Orange.data.variable.Variable`
140
141    :param method: feature discretization method.
142    :type method: :class:`Discretization`
143    """
144    def __new__(cls, data=None, features=None, discretize_class=False, method=EqualFreq(n_intervals=3)):
145        if data is None:
146            self = object.__new__(cls, features=features, discretize_class=discretize_class, method=method)
147            return self
148        else:
149            self = cls(features=features, discretize_class=discretize_class, method=method)
150            return self(data)
151
152    def __init__(self, features=None, discretize_class=False, method=EqualFreq(n_intervals=3)):
153        self.features = features
154        self.discretize_class = discretize_class
155        self.method = method
156
157    def __call__(self, data):
158        pp = Preprocessor_discretize(attributes=self.features, discretizeClass=self.discretize_class)
159        pp.method = self.method
160        return pp(data)
161
Note: See TracBrowser for help on using the repository browser.