source: orange/Orange/testing/regression/tests_20/reference_imputation.py @ 9952:986e9cd806f4

Revision 9952:986e9cd806f4, 4.0 KB checked in by Miha Stajdohar <miha.stajdohar@…>, 2 years ago (diff)

Tests moved and renamed from orange 20.

Line 
1# Description: Shows the use of classes for imputation
2# Category:    default classification accuracy, statistics
3# Classes:     Imputer, Imputer_defaults, Imputer_asValue, Imputer_model, ImputerConstructor, ImputerConstructor_minimal, ImputerConstructor_maximal, ImputerConstructor_average, ImputerConstructor_asValue, ImputerConstructor_model
4# Uses:        bridges
5# Referenced:  imputation.htm
6
7import orange
8
9data = orange.ExampleTable("bridges")
10
11print "\n*** IMPUTING MINIMAL VALUES ***\n"
12
13imputer = orange.ImputerConstructor_minimal(data)
14
15print "Example w/ missing values"
16print data[19]
17print "Imputed:"
18print imputer(data[19])
19print
20
21impdata = imputer(data)
22for i in range(20, 25):
23    print data[i]
24    print impdata[i]
25    print
26
27
28print "\n*** IMPUTING MAXIMAL VALUES ***\n"
29
30imputer = orange.ImputerConstructor_maximal(data)
31
32print "Example w/ missing values"
33print data[19]
34print "Imputed:"
35print imputer(data[19])
36print
37
38impdata = imputer(data)
39for i in range(20, 25):
40    print data[i]
41    print impdata[i]
42    print
43
44
45print "\n*** IMPUTING AVERAGE/MAJORITY VALUES ***\n"
46
47imputer = orange.ImputerConstructor_average(data)
48
49print "Example w/ missing values"
50print data[19]
51print "Imputed:"
52print imputer(data[19])
53print
54
55impdata = imputer(data)
56for i in range(20, 25):
57    print data[i]
58    print impdata[i]
59    print
60
61
62print "\n*** MANUALLY CONSTRUCTED IMPUTER ***\n"
63
64imputer = orange.Imputer_defaults(data.domain)
65imputer.defaults["LENGTH"] = 1234
66
67print "Example w/ missing values"
68print data[19]
69print "Imputed:"
70print imputer(data[19])
71print
72
73impdata = imputer(data)
74for i in range(20, 25):
75    print data[i]
76    print impdata[i]
77    print
78
79
80print "\n*** TREE-BASED IMPUTATION ***\n"
81
82import orngTree
83imputer = orange.ImputerConstructor_model()
84imputer.learnerContinuous = imputer.learnerDiscrete = orngTree.TreeLearner(minSubset = 20)
85imputer = imputer(data)
86
87print "Example w/ missing values"
88print data[19]
89print "Imputed:"
90print imputer(data[19])
91print
92
93
94impdata = imputer(data)
95for i in range(20, 25):
96    print data[i]
97    print impdata[i]
98    print
99
100
101
102print "\n*** BAYES and AVERAGE IMPUTATION ***\n"
103
104imputer = orange.ImputerConstructor_model()
105imputer.learnerContinuous = orange.MajorityLearner()
106imputer.learnerDiscrete = orange.BayesLearner()
107imputer = imputer(data)
108
109print "Example w/ missing values"
110print data[19]
111print "Imputed:"
112print imputer(data[19])
113print
114
115
116impdata = imputer(data)
117for i in range(20, 25):
118    print data[i]
119    print impdata[i]
120    print
121
122
123print "\n*** CUSTOM IMPUTATION BY MODELS ***\n"
124
125imputer = orange.Imputer_model()
126imputer.models = [None] * len(data.domain)
127
128imputer.models[data.domain.index("LANES")] = orange.DefaultClassifier(2.0)
129
130tord = orange.DefaultClassifier(orange.Value(data.domain["T-OR-D"], "THROUGH"))
131imputer.models[data.domain.index("T-OR-D")] = tord
132
133import orngTree
134len_domain = orange.Domain(["MATERIAL", "SPAN", "ERECTED", "LENGTH"], data.domain)
135len_data = orange.ExampleTable(len_domain, data)
136len_tree = orngTree.TreeLearner(len_data, minSubset=20)
137imputer.models[data.domain.index("LENGTH")] = len_tree
138orngTree.printTxt(len_tree)
139
140spanVar = data.domain["SPAN"]
141def computeSpan(ex, rw):
142    if ex["TYPE"] == "WOOD" or ex["PURPOSE"] == "WALK":
143        return orange.Value(spanVar, "SHORT")
144    else:
145        return orange.Value(spanVar, "MEDIUM")
146
147imputer.models[data.domain.index("SPAN")] = computeSpan
148
149for i in range(20, 25):
150    print data[i]
151    print impdata[i]
152    print
153
154##for i in imputer(data):
155##    print i
156
157
158print "\n*** IMPUTATION WITH SPECIAL VALUES ***\n"
159
160imputer = orange.ImputerConstructor_asValue(data)
161
162original = data[19]
163imputed = imputer(data[19])
164
165print original.domain
166print
167print imputed.domain
168print
169
170for i in original.domain:
171    print "%s: %s -> %s" % (original.domain[i].name, original[i], imputed[i.name]),
172    if original.domain[i].varType == orange.VarTypes.Continuous:
173        print "(%s)" % imputed[i.name+"_def"]
174    else:
175        print
176print
177
178impdata = imputer(data)
179for i in range(20, 25):
180    print data[i]
181    print impdata[i]
182    print
Note: See TracBrowser for help on using the repository browser.