source: orange/Orange/data/discretization.py @ 11091:cc100f17a18c

Revision 11091:cc100f17a18c, 2.1 KB checked in by Ales Erjavec <ales.erjavec@…>, 15 months ago (diff)

Fixed DiscretizeTable with clean=True when called with a class less domain.

Would fail in data.select call with TypeError. Also added a basic unittest.

RevLine 
[9943]1import Orange
2
3from Orange.core import\
4    EquiNDiscretization as EqualFreq,\
5    BiModalDiscretization as BiModal,\
6    Preprocessor_discretize
7
[11091]8
[9943]9class DiscretizeTable(object):
10    """Discretizes all continuous features of the data table.
11
[11091]12    :param data: Data to discretize.
[9943]13    :type data: :class:`Orange.data.Table`
14
[11091]15    :param features: Data features to discretize. `None` (default) to
16        discretize all features.
[10046]17    :type features: list of :class:`Orange.feature.Descriptor`
[9943]18
[11091]19    :param method: Feature discretization method.
20    :type method: :class:`Orange.feature.discretization.Discretization`
[9943]21
[11091]22    :param clean: Clean the data domain after discretization. If `True`,
23        features discretized to a constant will be removed. Useful only
24        for discretizers which infer number of discretization intervals
25        from data, like :class:`Orange.feature.discretize.Entropy`
26        (default: `True`).
27    :type clean: bool
[9943]28
29    """
[11091]30    def __new__(cls, data=None, features=None, discretize_class=False,
31                method=EqualFreq(n=3), clean=True):
[9943]32        if data is None:
33            self = object.__new__(cls)
34            return self
35        else:
[11091]36            self = cls(features=features, discretize_class=discretize_class,
37                       method=method, clean=clean)
[9943]38            return self(data)
39
[11091]40    def __init__(self, features=None, discretize_class=False,
41                 method=EqualFreq(n=3), clean=True):
[9943]42        self.features = features
43        self.discretize_class = discretize_class
44        self.method = method
45        self.clean = clean
46
47    def __call__(self, data):
[11091]48        pp = Preprocessor_discretize(attributes=self.features,
49                                     discretize_class=self.discretize_class)
[9943]50        pp.method = self.method
51        ddata = pp(data)
52
53        if self.clean:
[11091]54            features = [x for x in ddata.domain.features if len(x.values) > 1]
55            domain = Orange.data.Domain(features, ddata.domain.class_var,
56                                        class_vars=ddata.domain.class_vars)
57            return Orange.data.Table(domain, ddata)
[9943]58        else:
59            return ddata
Note: See TracBrowser for help on using the repository browser.