source: orange/docs/reference/rst/code/imputation-complex.py @ 9638:eb3af38491f2

Revision 9638:eb3af38491f2, 4.5 KB checked in by umek@…, 2 years ago (diff)

Changed table to data or name of the data set.

For example - instead of

table = Orange.data.Table("housing")

is changed to

housing = Orange.data.Table("housing")

Line 
1# Description: Shows the use of classes for imputation
2# Category:    imputation
3# Uses:        bridges
4# Referenced:  Orange.feature.html#imputation
5# Classes:     Orange.feature.imputation.Imputer, Orange.feature.imputation.Imputer_defaults, Orange.feature.imputation.Imputer_asValue, Orange.feature.imputation.Imputer_model, Orange.feature.imputation.ImputerConstructor, Orange.feature.imputation.ImputerConstructor_minimal, Orange.feature.imputation.ImputerConstructor_maximal, Orange.feature.imputation.ImputerConstructor_average, Orange.feature.imputation.ImputerConstructor_asValue, Orange.feature.imputation.ImputerConstructor_model
6
7import Orange
8
9bridges = Orange.data.Table("bridges")
10
11print "*** IMPUTING MINIMAL VALUES ***"
12imputer = Orange.feature.imputation.ImputerConstructor_minimal(bridges)
13print "Example w/ missing values"
14print bridges[19]
15print "Imputed:"
16print imputer(bridges[19])
17print
18
19impdata = imputer(bridges)
20for i in range(20, 25):
21    print bridges[i]
22    print impdata[i]
23    print
24
25
26print "*** IMPUTING MAXIMAL VALUES ***"
27imputer = Orange.feature.imputation.ImputerConstructor_maximal(bridges)
28print "Example w/ missing values"
29print bridges[19]
30print "Imputed:"
31print imputer(bridges[19])
32print
33
34impdata = imputer(bridges)
35for i in range(20, 25):
36    print bridges[i]
37    print impdata[i]
38    print
39
40
41print "*** IMPUTING AVERAGE/MAJORITY VALUES ***"
42imputer = Orange.feature.imputation.ImputerConstructor_average(bridges)
43print "Example w/ missing values"
44print bridges[19]
45print "Imputed:"
46print imputer(bridges[19])
47print
48
49impdata = imputer(bridges)
50for i in range(20, 25):
51    print bridges[i]
52    print impdata[i]
53    print
54
55
56print "*** MANUALLY CONSTRUCTED IMPUTER ***"
57imputer = Orange.feature.imputation.Imputer_defaults(bridges.domain)
58imputer.defaults["LENGTH"] = 1234
59print "Example w/ missing values"
60print bridges[19]
61print "Imputed:"
62print imputer(bridges[19])
63print
64
65impdata = imputer(bridges)
66for i in range(20, 25):
67    print bridges[i]
68    print impdata[i]
69    print
70
71
72print "*** TREE-BASED IMPUTATION ***"
73
74imputer = Orange.feature.imputation.ImputerConstructor_model()
75imputer.learner_continuous = imputer.learner_discrete = Orange.classification.tree.TreeLearner(minSubset=20)
76imputer = imputer(bridges)
77print "Example w/ missing values"
78print bridges[19]
79print "Imputed:"
80print imputer(bridges[19])
81print
82
83impdata = imputer(bridges)
84for i in range(20, 25):
85    print bridges[i]
86    print impdata[i]
87    print
88
89
90print "*** BAYES and AVERAGE IMPUTATION ***"
91imputer = Orange.feature.imputation.ImputerConstructor_model()
92imputer.learner_continuous = Orange.regression.mean.MeanLearner()
93imputer.learner_discrete = Orange.classification.bayes.NaiveLearner()
94imputer = imputer(bridges)
95print "Example w/ missing values"
96print bridges[19]
97print "Imputed:"
98print imputer(bridges[19])
99print
100impdata = imputer(bridges)
101for i in range(20, 25):
102    print bridges[i]
103    print impdata[i]
104    print
105
106
107print "*** CUSTOM IMPUTATION BY MODELS ***"
108imputer = Orange.feature.imputation.Imputer_model()
109imputer.models = [None] * len(bridges.domain)
110imputer.models[bridges.domain.index("LANES")] = Orange.classification.ConstantClassifier(2.0)
111tord = Orange.classification.ConstantClassifier(Orange.data.Value(bridges.domain["T-OR-D"], "THROUGH"))
112imputer.models[bridges.domain.index("T-OR-D")] = tord
113
114
115len_domain = Orange.data.Domain(["MATERIAL", "SPAN", "ERECTED", "LENGTH"], bridges.domain)
116len_data = Orange.data.Table(len_domain, bridges)
117len_tree = Orange.classification.tree.TreeLearner(len_data, minSubset=20)
118imputer.models[bridges.domain.index("LENGTH")] = len_tree
119print len_tree
120
121span_var = bridges.domain["SPAN"]
122def compute_span(ex, rw):
123    if ex["TYPE"] == "WOOD" or ex["PURPOSE"] == "WALK":
124        return orange.Value(span_var, "SHORT")
125    else:
126        return orange.Value(span_var, "MEDIUM")
127
128imputer.models[bridges.domain.index("SPAN")] = compute_span
129
130for i in range(20, 25):
131    print bridges[i]
132    print impdata[i]
133    print
134
135
136print "*** IMPUTATION WITH SPECIAL VALUES ***"
137imputer = Orange.feature.imputation.ImputerConstructor_asValue(bridges)
138original = bridges[19]
139imputed = imputer(bridges[19])
140print original.domain
141print
142print imputed.domain
143print
144
145for i in original.domain:
146    print "%s: %s -> %s" % (original.domain[i].name, original[i], imputed[i.name]),
147    if original.domain[i].varType == Orange.core.VarTypes.Continuous:
148        print "(%s)" % imputed[i.name+"_def"]
149    else:
150        print
151print
152
153impdata = imputer(bridges)
154for i in range(20, 25):
155    print bridges[i]
156    print impdata[i]
157    print
Note: See TracBrowser for help on using the repository browser.