# source:orange/Orange/testing/regression/tests_20/reference_discretization.py@9952:986e9cd806f4

Revision 9952:986e9cd806f4, 3.9 KB checked in by Miha Stajdohar <miha.stajdohar@…>, 2 years ago (diff)

Tests moved and renamed from orange 20.

Line
1# Description: Shows how usage of different classes for discretization, including manual discretization
2# Category:    discretization, categorization, preprocessing
3# Classes:     EntropyDiscretization, EquiDistDiscretization, BiModalDiscretization, Discretization, IntervalDiscretizer, Discretizer, BiModalDiscretizer
4# Uses:        iris
5# Referenced:  discretization.htm
6
7import orange
8
9data = orange.ExampleTable("iris")
10
11print "\nEntropy discretization, first 10 examples"
12sep_w = orange.EntropyDiscretization("sepal width", data)
13
14data2 = data.select([data.domain["sepal width"], sep_w, data.domain.classVar])
15for ex in data2[:10]:
16    print ex
17
18print "\nDiscretized attribute:", sep_w
19print "Continuous attribute:", sep_w.getValueFrom.whichVar
20print "Cut-off points:", sep_w.getValueFrom.transformer.points
21
22
23print "\nManual construction of IntervalDiscretizer - single attribute"
24idisc = orange.IntervalDiscretizer(points = [3.0, 5.0])
25sep_l = idisc.constructVariable(data.domain["sepal length"])
26data2 = data.select([data.domain["sepal length"], sep_l, data.domain.classVar])
27for ex in data2[:10]:
28    print ex
29
30
31print "\nManual construction of IntervalDiscretizer - all attributes"
32idisc = orange.IntervalDiscretizer(points = [3.0, 5.0])
33newattrs = [idisc.constructVariable(attr) for attr in data.domain.attributes]
34data2 = data.select(newattrs + [data.domain.classVar])
35for ex in data2[:10]:
36    print ex
37
38
39print "\n\nEqual interval size discretization"
40disc = orange.EquiDistDiscretization(numberOfIntervals = 6)
41newattrs = [disc(attr, data) for attr in data.domain.attributes]
42data2 = data.select(newattrs + [data.domain.classVar])
43
44for attr in newattrs:
45    print "%s: %s" % (attr.name, attr.values)
46print
47
48for attr in newattrs:
49    print "%15s: first interval at %5.3f, step %5.3f" % (attr.name, attr.getValueFrom.transformer.firstCut, attr.getValueFrom.transformer.step)
50    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
51print
52
53
54
55print "\n\nQuartile discretization"
56disc = orange.EquiNDiscretization(numberOfIntervals = 6)
57newattrs = [disc(attr, data) for attr in data.domain.attributes]
58data2 = data.select(newattrs + [data.domain.classVar])
59
60for attr in newattrs:
61    print "%s: %s" % (attr.name, attr.values)
62print
63
64for attr in newattrs:
65    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
66print
67
68
69
70print "\nManual construction of EquiDistDiscretizer - all attributes"
71edisc = orange.EquiDistDiscretizer(firstCut = 2.0, step = 1.0, numberOfIntervals = 5)
72newattrs = [edisc.constructVariable(attr) for attr in data.domain.attributes]
73data2 = data.select(newattrs + [data.domain.classVar])
74for ex in data2[:10]:
75    print ex
76
77
79entro = orange.EntropyDiscretization()
80for attr in data.domain.attributes:
81    disc = entro(attr, data)
82    print "%s: %s" % (attr.name, disc.getValueFrom.transformer.points)
83print
84
85
86newclass = orange.EnumVariable("is versicolor", values = ["no", "yes"])
87newclass.getValueFrom = lambda ex, w: ex["iris"]=="Iris-versicolor"
88newdomain = orange.Domain(data.domain.attributes, newclass)
89data_v = orange.ExampleTable(newdomain, data)
90
91print "\nBi-Modal discretization on binary problem"
92bimod = orange.BiModalDiscretization(splitInTwo = 0)
93for attr in data_v.domain.attributes:
94    disc = bimod(attr, data_v)
95    print "%s: %s" % (attr.name, disc.getValueFrom.transformer.points)
96print
97
98print "\nBi-Modal discretization on binary problem"
99bimod = orange.BiModalDiscretization()
100for attr in data_v.domain.attributes:
101    disc = bimod(attr, data_v)
102    print "%s: (%5.3f, %5.3f]" % (attr.name, disc.getValueFrom.transformer.low, disc.getValueFrom.transformer.high)
103print
104
105
106print "\nEntropy discretization on binary problem"
107for attr in data_v.domain.attributes:
108    disc = entro(attr, data_v)
109    print "%s: %s" % (attr.name, disc.getValueFrom.transformer.points)
Note: See TracBrowser for help on using the repository browser.