source: orange/Orange/feature/discretization.py @ 9900:795a819ca3bb

Revision 9900:795a819ca3bb, 5.8 KB checked in by blaz <blaz.zupan@…>, 2 years ago (diff)

preliminary discretization structure (draft)

Line 
1import Orange
2import Orange.core as orange
3
4from Orange.core import \
5    Discrete2Continuous, \
6    Discretizer, \
7    BiModalDiscretizer, \
8    EquiDistDiscretizer as EqualWidthDiscretizer, \
9    IntervalDiscretizer, \
10    ThresholdDiscretizer,\
11    EntropyDiscretization as Entropy, \
12    EquiDistDiscretization as EqualWidth, \
13    EquiNDiscretization as EqualFreq, \
14    BiModalDiscretization as BiModal, \
15    Discretization, \
16    Preprocessor_discretize
17
18def entropyDiscretization_wrapper(data):
19    """Discretize all continuous features in class-labeled data set with the entropy-based discretization
20    :obj:`Entropy`.
21   
22    :param data: data to discretize.
23    :type data: Orange.data.Table
24    :rtype: :obj:`Orange.data.Table` includes all categorical and discretized\
25    continuous features from the original data table.
26   
27    After categorization, features that were categorized to a single interval
28    (to a constant value) are removed from table and prints their names.
29    Returns a table that
30
31    """
32    orange.setrandseed(0)
33    data_new = orange.Preprocessor_discretize(data, method=Entropy())
34   
35    attrlist = []
36    nrem = 0
37    for i in data_new.domain.attributes:
38        if (len(i.values)>1):
39            attrlist.append(i)
40        else:
41            nrem=nrem+1
42    attrlist.append(data_new.domain.classVar)
43    return data_new.select(attrlist)
44
45
46class EntropyDiscretization_wrapper:
47    """This is simple wrapper class around the function
48    :obj:`entropyDiscretization`.
49   
50    :param data: data to discretize.
51    :type data: Orange.data.Table
52   
53    Once invoked it would either create an object that can be passed a data
54    set for discretization, or if invoked with the data set, would return a
55    discretized data set::
56
57        discretizer = Orange.feature.dicretization.Entropy()
58        disc_data = discretizer(table)
59        another_disc_data = Orange.feature.dicretization.Entropy(table)
60
61    """
62    def __call__(self, data):
63        return entropyDiscretization(data)
64
65def DiscretizedLearner(baseLearner, examples=None, weight=0, **kwds):
66  learner = apply(DiscretizedLearner_Class, [baseLearner], kwds)
67  if examples: return learner(examples, weight)
68  else: return learner
69
70class DiscretizedLearner_Class:
71    """This class allows to set an learner object, such that before learning a
72    data passed to a learner is discretized. In this way we can prepare an
73    object that lears without giving it the data, and, for instance, use it in
74    some standard testing procedure that repeats learning/testing on several
75    data samples.
76
77    :param baseLearner: learner to which give discretized data
78    :type baseLearner: Orange.classification.Learner
79   
80    :param table: data whose continuous features need to be discretized
81    :type table: Orange.data.Table
82   
83    :param discretizer: a discretizer that converts continuous values into
84      discrete. Defaults to
85      :obj:`Orange.feature.discretization.Entropy`.
86    :type discretizer: Orange.feature.discretization.Discretization
87   
88    :param name: name to assign to learner
89    :type name: string
90
91    An example on how such learner is set and used in ten-fold cross validation
92    is given below::
93
94        from Orange.feature import discretization
95        bayes = Orange.classification.bayes.NaiveBayesLearner()
96        disc = orange.Preprocessor_discretize(method=discretization.EquiNDiscretization(numberOfIntervals=10))
97        dBayes = discretization.DiscretizedLearner(bayes, name='disc bayes')
98        dbayes2 = discretization.DiscretizedLearner(bayes, name="EquiNBayes", discretizer=disc)
99        results = Orange.evaluation.testing.CrossValidation([dBayes], table)
100        classifier = discretization.DiscretizedLearner(bayes, examples=table)
101
102    """
103    def __init__(self, baseLearner, discretizer=Entropy(), **kwds):
104        self.baseLearner = baseLearner
105        if hasattr(baseLearner, "name"):
106            self.name = baseLearner.name
107        self.discretizer = discretizer
108        self.__dict__.update(kwds)
109    def __call__(self, data, weight=None):
110        # filter the data and then learn
111        from Orange.preprocess import Preprocessor_discretize
112        ddata = Preprocessor_discretize(data, method=self.discretizer)
113        if weight<>None:
114            model = self.baseLearner(ddata, weight)
115        else:
116            model = self.baseLearner(ddata)
117        dcl = DiscretizedClassifier(classifier = model)
118        if hasattr(model, "domain"):
119            dcl.domain = model.domain
120        if hasattr(model, "name"):
121            dcl.name = model.name
122        return dcl
123
124class DiscretizedClassifier:
125  def __init__(self, **kwds):
126    self.__dict__.update(kwds)
127  def __call__(self, example, resultType = orange.GetValue):
128    return self.classifier(example, resultType)
129
130class DiscretizeTable(object):
131    """Discretizes all continuous features of the data table.
132
133    :param data: data to discretize.
134    :type data: :class:`Orange.data.Table`
135
136    :param features: data features to discretize. None (default) to discretize all features.
137    :type features: list of :class:`Orange.data.variable.Variable`
138
139    :param method: feature discretization method.
140    :type method: :class:`Discretization`
141    """
142    def __new__(cls, data=None, features=None, discretize_class=False, method=EqualFreq(n=3)):
143        if data is None:
144            self = object.__new__(cls)
145            return self
146        else:
147            self = cls(features=features, discretize_class=discretize_class, method=method)
148            return self(data)
149
150    def __init__(self, features=None, discretize_class=False, method=EqualFreq(n=3)):
151        self.features = features
152        self.discretize_class = discretize_class
153        self.method = method
154
155    def __call__(self, data):
156        pp = Preprocessor_discretize(attributes=self.features, discretizeClass=self.discretize_class)
157        pp.method = self.method
158        return pp(data)
159
Note: See TracBrowser for help on using the repository browser.