source: orange/Orange/data/discretization.py @ 11091:cc100f17a18c

Revision 11091:cc100f17a18c, 2.1 KB checked in by Ales Erjavec <ales.erjavec@…>, 15 months ago (diff)

Fixed DiscretizeTable with clean=True when called with a class less domain.

Would fail in data.select call with TypeError. Also added a basic unittest.

Line 
1import Orange
2
3from Orange.core import\
4    EquiNDiscretization as EqualFreq,\
5    BiModalDiscretization as BiModal,\
6    Preprocessor_discretize
7
8
9class DiscretizeTable(object):
10    """Discretizes all continuous features of the data table.
11
12    :param data: Data to discretize.
13    :type data: :class:`Orange.data.Table`
14
15    :param features: Data features to discretize. `None` (default) to
16        discretize all features.
17    :type features: list of :class:`Orange.feature.Descriptor`
18
19    :param method: Feature discretization method.
20    :type method: :class:`Orange.feature.discretization.Discretization`
21
22    :param clean: Clean the data domain after discretization. If `True`,
23        features discretized to a constant will be removed. Useful only
24        for discretizers which infer number of discretization intervals
25        from data, like :class:`Orange.feature.discretize.Entropy`
26        (default: `True`).
27    :type clean: bool
28
29    """
30    def __new__(cls, data=None, features=None, discretize_class=False,
31                method=EqualFreq(n=3), clean=True):
32        if data is None:
33            self = object.__new__(cls)
34            return self
35        else:
36            self = cls(features=features, discretize_class=discretize_class,
37                       method=method, clean=clean)
38            return self(data)
39
40    def __init__(self, features=None, discretize_class=False,
41                 method=EqualFreq(n=3), clean=True):
42        self.features = features
43        self.discretize_class = discretize_class
44        self.method = method
45        self.clean = clean
46
47    def __call__(self, data):
48        pp = Preprocessor_discretize(attributes=self.features,
49                                     discretize_class=self.discretize_class)
50        pp.method = self.method
51        ddata = pp(data)
52
53        if self.clean:
54            features = [x for x in ddata.domain.features if len(x.values) > 1]
55            domain = Orange.data.Domain(features, ddata.domain.class_var,
56                                        class_vars=ddata.domain.class_vars)
57            return Orange.data.Table(domain, ddata)
58        else:
59            return ddata
Note: See TracBrowser for help on using the repository browser.