Ignore:
Timestamp:
02/05/13 18:37:24 (15 months ago)
Author:
Ales Erjavec <ales.erjavec@…>
Branch:
default
Message:

Fixed DiscretizeTable with clean=True when called with a class less domain.

Would fail in data.select call with TypeError. Also added a basic unittest.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • Orange/data/discretization.py

    r10046 r11091  
    66    Preprocessor_discretize 
    77 
     8 
    89class DiscretizeTable(object): 
    910    """Discretizes all continuous features of the data table. 
    1011 
    11     :param data: data to discretize. 
     12    :param data: Data to discretize. 
    1213    :type data: :class:`Orange.data.Table` 
    1314 
    14     :param features: data features to discretize. None (default) to discretize all features. 
     15    :param features: Data features to discretize. `None` (default) to 
     16        discretize all features. 
    1517    :type features: list of :class:`Orange.feature.Descriptor` 
    1618 
    17     :param method: feature discretization method. 
    18     :type method: :class:`Discretization` 
     19    :param method: Feature discretization method. 
     20    :type method: :class:`Orange.feature.discretization.Discretization` 
    1921 
    20     :param clean: clean the data domain after discretization. If True, features discretized to a constant will be 
    21       removed. Useful only for discretizers which infer number of discretization intervals from data, 
    22       like :class:`Orange.feature.discretize.Entropy` (default: True). 
    23     :type clean: boolean 
     22    :param clean: Clean the data domain after discretization. If `True`, 
     23        features discretized to a constant will be removed. Useful only 
     24        for discretizers which infer number of discretization intervals 
     25        from data, like :class:`Orange.feature.discretize.Entropy` 
     26        (default: `True`). 
     27    :type clean: bool 
    2428 
    2529    """ 
    26     def __new__(cls, data=None, features=None, discretize_class=False, method=EqualFreq(n=3), clean=True): 
     30    def __new__(cls, data=None, features=None, discretize_class=False, 
     31                method=EqualFreq(n=3), clean=True): 
    2732        if data is None: 
    2833            self = object.__new__(cls) 
    2934            return self 
    3035        else: 
    31             self = cls(features=features, discretize_class=discretize_class, method=method, clean=clean) 
     36            self = cls(features=features, discretize_class=discretize_class, 
     37                       method=method, clean=clean) 
    3238            return self(data) 
    3339 
    34     def __init__(self, features=None, discretize_class=False, method=EqualFreq(n=3), clean=True): 
     40    def __init__(self, features=None, discretize_class=False, 
     41                 method=EqualFreq(n=3), clean=True): 
    3542        self.features = features 
    3643        self.discretize_class = discretize_class 
     
    3946 
    4047    def __call__(self, data): 
    41         pp = Preprocessor_discretize(attributes=self.features, discretizeClass=self.discretize_class) 
     48        pp = Preprocessor_discretize(attributes=self.features, 
     49                                     discretize_class=self.discretize_class) 
    4250        pp.method = self.method 
    4351        ddata = pp(data) 
    4452 
    4553        if self.clean: 
    46             return ddata.select([x for x in ddata.domain.features if len(x.values)>1] + [ddata.domain.classVar]) 
     54            features = [x for x in ddata.domain.features if len(x.values) > 1] 
     55            domain = Orange.data.Domain(features, ddata.domain.class_var, 
     56                                        class_vars=ddata.domain.class_vars) 
     57            return Orange.data.Table(domain, ddata) 
    4758        else: 
    4859            return ddata 
Note: See TracChangeset for help on using the changeset viewer.