source: orange/orange/Orange/feature/discretization.py @ 7660:5f432be7cddc

Revision 7660:5f432be7cddc, 6.5 KB checked in by janezd <janez.demsar@…>, 3 years ago (diff)

Renamed Orange.data.feature to Orange.data.variable

Line 
1"""
2
3.. index:: discretization
4
5.. index::
6   single: feature; discretization
7
8This module implements some functions and classes that can be used for
9categorization of continuous features. Besides several general classes that
10can help in this task, we also provide a function that may help in
11entropy-based discretization (Fayyad & Irani), and a wrapper around classes for
12categorization that can be used for learning.
13
14.. class:: Orange.feature.discretization.EntropyDiscretization
15   
16    Discretize the given feature's and return a discretized feature. The new
17    attribute's values get computed automatically when they are needed.
18   
19    :param attribute: continuous feature to discretize
20    :type attribute: :obj:`Orange.data.variable.Variable`
21    :param examples: data to discretize
22    :type examples: :obj:`Orange.data.Table`
23    :param weight: meta feature that stores weights of individual data
24          instances
25    :type weight: Orange.data.variable.Variable
26    :rtype: :obj:`Orange.data.variable.Discrete`
27       
28.. automethod:: Orange.feature.discretization.entropyDiscretization_wrapper
29
30.. autoclass:: Orange.feature.discretization.EntropyDiscretization_wrapper
31
32.. autoclass:: Orange.feature.discretization.DiscretizedLearner_Class
33
34.. rubric:: Example
35
36A chapter on `feature subset selection <../ofb/o_fss.htm>`_ in Orange
37for Beginners tutorial shows the use of DiscretizedLearner. Other
38discretization classes from core Orange are listed in chapter on
39`categorization <../ofb/o_categorization.htm>`_ of the same tutorial.
40
41.. note::
42    add from reference http://orange.biolab.si/doc/reference/discretization.htm
43
44==========
45References
46==========
47
48* UM Fayyad and KB Irani. Multi-interval discretization of continuous valued
49  attributes for classification learning. In Proceedings of the 13th
50  International Joint Conference on Artificial Intelligence, pages
51  1022--1029, Chambery, France, 1993.
52
53"""
54
55import Orange.core as orange
56
57from Orange.core import \
58    Discrete2Continuous, \
59    Discretizer, \
60        BiModalDiscretizer, \
61        EquiDistDiscretizer, \
62        IntervalDiscretizer, \
63        ThresholdDiscretizer, \
64        EntropyDiscretization
65
66######
67# from orngDics.py
68def entropyDiscretization_wrapper(table):
69    """Take the classified table set (table) and categorize all continuous
70    features using the entropy based discretization
71    :obj:`EntropyDiscretization`.
72   
73    :param table: data to discretize.
74    :type table: Orange.data.Table
75    :rtype: :obj:`Orange.data.Table` includes all categorical and discretized\
76    continuous features from the original data table.
77   
78    After categorization, features that were categorized to a single interval
79    (to a constant value) are removed from table and prints their names.
80    Returns a table that
81
82    """
83    orange.setrandseed(0)
84    tablen=orange.Preprocessor_discretize(table, method=EntropyDiscretization())
85   
86    attrlist=[]
87    nrem=0
88    for i in tablen.domain.attributes:
89        if (len(i.values)>1):
90            attrlist.append(i)
91        else:
92            nrem=nrem+1
93    attrlist.append(tablen.domain.classVar)
94    return tablen.select(attrlist)
95
96
97class EntropyDiscretization_wrapper:
98    """This is simple wrapper class around the function
99    :obj:`entropyDiscretization`.
100   
101    :param data: data to discretize.
102    :type data: Orange.data.Table
103   
104    Once invoked it would either create an object that can be passed a data
105    set for discretization, or if invoked with the data set, would return a
106    discretized data set::
107
108        discretizer = Orange.feature.dicretization.EntropyDiscretization()
109        disc_data = discretizer(table)
110        another_disc_data = Orange.feature.dicretization.EntropyDiscretization(table)
111
112    """
113    def __call__(self, data):
114        return entropyDiscretization(data)
115
116def DiscretizedLearner(baseLearner, examples=None, weight=0, **kwds):
117  learner = apply(DiscretizedLearner_Class, [baseLearner], kwds)
118  if examples: return learner(examples, weight)
119  else: return learner
120
121class DiscretizedLearner_Class:
122    """This class allows to set an learner object, such that before learning a
123    data passed to a learner is discretized. In this way we can prepare an
124    object that lears without giving it the data, and, for instance, use it in
125    some standard testing procedure that repeats learning/testing on several
126    data samples.
127
128    :param baseLearner: learner to which give discretized data
129    :type baseLearner: Orange.classification.Learner
130   
131    :param table: data whose continuous features need to be discretized
132    :type table: Orange.data.Table
133   
134    :param discretizer: a discretizer that converts continuous values into
135      discrete. Defaults to
136      :obj:`Orange.feature.discretization.EntropyDiscretization`.
137    :type discretizer: Orange.feature.discretization.Discretization
138   
139    :param name: name to assign to learner
140    :type name: string
141
142    An example on how such learner is set and used in ten-fold cross validation
143    is given below::
144
145        from Orange.feature import discretization
146        bayes = Orange.classification.bayes.NaiveBayesLearner()
147        disc = orange.Preprocessor_discretize(method=discretization.EquiNDiscretization(numberOfIntervals=10))
148        dBayes = discretization.DiscretizedLearner(bayes, name='disc bayes')
149        dbayes2 = discretization.DiscretizedLearner(bayes, name="EquiNBayes", discretizer=disc)
150        results = Orange.evaluation.testing.CrossValidation([dBayes], table)
151        classifier = discretization.DiscretizedLearner(bayes, examples=table)
152
153    """
154    def __init__(self, baseLearner, discretizer=EntropyDiscretization(), **kwds):
155        self.baseLearner = baseLearner
156        if hasattr(baseLearner, "name"):
157            self.name = baseLearner.name
158        self.discretizer = discretizer
159        self.__dict__.update(kwds)
160    def __call__(self, data, weight=None):
161        # filter the data and then learn
162        from Orange.preprocess import Preprocessor_discretize
163        ddata = Preprocessor_discretize(data, method=self.discretizer)
164        if weight<>None:
165            model = self.baseLearner(ddata, weight)
166        else:
167            model = self.baseLearner(ddata)
168        dcl = DiscretizedClassifier(classifier = model)
169        if hasattr(model, "domain"):
170            dcl.domain = model.domain
171        if hasattr(model, "name"):
172            dcl.name = model.name
173        return dcl
174
175class DiscretizedClassifier:
176  def __init__(self, **kwds):
177    self.__dict__.update(kwds)
178  def __call__(self, example, resultType = orange.GetValue):
179    return self.classifier(example, resultType)
Note: See TracBrowser for help on using the repository browser.