source: orange/orange/doc/Orange/rst/code/discretization.py @ 7770:e1dafa5e0037

Revision 7770:e1dafa5e0037, 4.1 KB checked in by markotoplak, 3 years ago (diff)

Orange 2.5. Discretization: added documentation from the reference.

Line 
1# Description: Shows how usage of different classes for discretization, including manual discretization
2# Category:    discretization, categorization, preprocessing
3# Classes:     EntropyDiscretization, EquiDistDiscretization, BiModalDiscretization, Discretization, IntervalDiscretizer, Discretizer, BiModalDiscretizer
4# Uses:        iris
5# Referenced:  discretization.htm
6
7import Orange
8data = Orange.data.Table("iris")
9
10print "\nEntropy discretization, first 10 examples"
11sep_w = Orange.feature.discretization.EntropyDiscretization("sepal width", data)
12
13data2 = data.select([data.domain["sepal width"], sep_w, data.domain.class_var])
14for ex in data2[:10]:
15    print ex
16
17print "\nDiscretized attribute:", sep_w
18print "Continuous attribute:", sep_w.get_value_from.whichVar #FIXME not which_var
19print "Cut-off points:", sep_w.get_value_from.transformer.points
20
21print "\nManual construction of IntervalDiscretizer - single attribute"
22idisc = Orange.feature.discretization.IntervalDiscretizer(points = [3.0, 5.0])
23sep_l = idisc.construct_variable(data.domain["sepal length"])
24data2 = data.select([data.domain["sepal length"], sep_l, data.domain.classVar])
25for ex in data2[:10]:
26    print ex
27
28
29print "\nManual construction of IntervalDiscretizer - all attributes"
30idisc = Orange.feature.discretization.IntervalDiscretizer(points = [3.0, 5.0])
31newattrs = [idisc.construct_variable(attr) for attr in data.domain.attributes]
32data2 = data.select(newattrs + [data.domain.class_var])
33for ex in data2[:10]:
34    print ex
35
36
37print "\n\nEqual interval size discretization"
38disc = Orange.feature.discretization.EquiDistDiscretization(numberOfIntervals = 6)
39newattrs = [disc(attr, data) for attr in data.domain.attributes]
40data2 = data.select(newattrs + [data.domain.classVar])
41
42for attr in newattrs:
43    print "%s: %s" % (attr.name, attr.values)
44print
45
46for attr in newattrs:
47    print "%15s: first interval at %5.3f, step %5.3f" % (attr.name, attr.get_value_from.transformer.first_cut, attr.get_value_from.transformer.step)
48    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.get_value_from.transformer.points])
49print
50
51
52
53print "\n\nQuartile discretization"
54disc = Orange.feature.discretization.EquiNDiscretization(numberOfIntervals = 6)
55newattrs = [disc(attr, data) for attr in data.domain.attributes]
56data2 = data.select(newattrs + [data.domain.classVar])
57
58for attr in newattrs:
59    print "%s: %s" % (attr.name, attr.values)
60print
61
62for attr in newattrs:
63    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.get_value_from.transformer.points])
64print
65
66
67
68print "\nManual construction of EquiDistDiscretizer - all attributes"
69edisc = Orange.feature.discretization.EquiDistDiscretizer(first_cut = 2.0, step = 1.0, number_of_intervals = 5)
70newattrs = [edisc.constructVariable(attr) for attr in data.domain.attributes]
71data2 = data.select(newattrs + [data.domain.classVar])
72for ex in data2[:10]:
73    print ex
74
75
76print "\nFayyad-Irani discretization"
77entro = Orange.feature.discretization.EntropyDiscretization()
78for attr in data.domain.attributes:
79    disc = entro(attr, data)
80    print "%s: %s" % (attr.name, disc.get_value_from.transformer.points)
81print
82
83
84newclass = Orange.data.variable.Discrete("is versicolor", values = ["no", "yes"])
85newclass.get_value_from = lambda ex, w: ex["iris"]=="Iris-versicolor"
86newdomain = Orange.data.Domain(data.domain.attributes, newclass)
87data_v = Orange.data.Table(newdomain, data)
88
89print "\nBi-Modal discretization on binary problem"
90bimod = Orange.feature.discretization.BiModalDiscretization(split_in_two = 0)
91for attr in data_v.domain.attributes:
92    disc = bimod(attr, data_v)
93    print "%s: %s" % (attr.name, disc.get_value_from.transformer.points)
94print
95
96print "\nBi-Modal discretization on binary problem"
97bimod = Orange.feature.discretization.BiModalDiscretization()
98for attr in data_v.domain.attributes:
99    disc = bimod(attr, data_v)
100    print "%s: (%5.3f, %5.3f]" % (attr.name, disc.get_value_from.transformer.low, disc.get_value_from.transformer.high)
101print
102
103
104print "\nEntropy discretization on binary problem"
105for attr in data_v.domain.attributes:
106    disc = entro(attr, data_v)
107    print "%s: %s" % (attr.name, disc.getValueFrom.transformer.points)
Note: See TracBrowser for help on using the repository browser.