source: orange/orange/doc/Orange/rst/code/imputation-complex.py @ 7568:b971bcbf4080

Revision 7568:b971bcbf4080, 4.4 KB checked in by tomazc <tomaz.curk@…>, 3 years ago (diff)

Documentatio and code refactoring at Bohinj retreat.

Line 
1# Description: Shows the use of classes for imputation
2# Category:    imputation
3# Uses:        bridges
4# Referenced:  Orange.feature.html#imputation
5# Classes:     Orange.feature.imputation.Imputer, Orange.feature.imputation.Imputer_defaults, Orange.feature.imputation.Imputer_asValue, Orange.feature.imputation.Imputer_model, Orange.feature.imputation.ImputerConstructor, Orange.feature.imputation.ImputerConstructor_minimal, Orange.feature.imputation.ImputerConstructor_maximal, Orange.feature.imputation.ImputerConstructor_average, Orange.feature.imputation.ImputerConstructor_asValue, Orange.feature.imputation.ImputerConstructor_model
6
7import Orange
8
9table = Orange.data.Table("bridges")
10
11print "*** IMPUTING MINIMAL VALUES ***"
12imputer = Orange.feature.imputation.ImputerConstructor_minimal(table)
13print "Example w/ missing values"
14print table[19]
15print "Imputed:"
16print imputer(table[19])
17print
18
19impdata = imputer(table)
20for i in range(20, 25):
21    print table[i]
22    print impdata[i]
23    print
24
25
26print "*** IMPUTING MAXIMAL VALUES ***"
27imputer = Orange.feature.imputation.ImputerConstructor_maximal(table)
28print "Example w/ missing values"
29print table[19]
30print "Imputed:"
31print imputer(table[19])
32print
33
34impdata = imputer(table)
35for i in range(20, 25):
36    print table[i]
37    print impdata[i]
38    print
39
40
41print "*** IMPUTING AVERAGE/MAJORITY VALUES ***"
42imputer = Orange.feature.imputation.ImputerConstructor_average(table)
43print "Example w/ missing values"
44print table[19]
45print "Imputed:"
46print imputer(table[19])
47print
48
49impdata = imputer(table)
50for i in range(20, 25):
51    print table[i]
52    print impdata[i]
53    print
54
55
56print "*** MANUALLY CONSTRUCTED IMPUTER ***"
57imputer = Orange.feature.imputation.Imputer_defaults(table.domain)
58imputer.defaults["LENGTH"] = 1234
59print "Example w/ missing values"
60print table[19]
61print "Imputed:"
62print imputer(table[19])
63print
64
65impdata = imputer(table)
66for i in range(20, 25):
67    print table[i]
68    print impdata[i]
69    print
70
71
72print "*** TREE-BASED IMPUTATION ***"
73import orngTree
74imputer = Orange.feature.imputation.ImputerConstructor_model()
75imputer.learnerContinuous = imputer.learnerDiscrete = orngTree.TreeLearner(minSubset = 20)
76imputer = imputer(table)
77print "Example w/ missing values"
78print table[19]
79print "Imputed:"
80print imputer(table[19])
81print
82
83impdata = imputer(table)
84for i in range(20, 25):
85    print table[i]
86    print impdata[i]
87    print
88
89
90print "*** BAYES and AVERAGE IMPUTATION ***"
91imputer = Orange.feature.imputation.ImputerConstructor_model()
92imputer.learnerContinuous = Orange.classification.majority.MajorityLearner()
93imputer.learnerDiscrete = Orange.classification.bayes.NaiveLearner()
94imputer = imputer(table)
95print "Example w/ missing values"
96print table[19]
97print "Imputed:"
98print imputer(table[19])
99print
100impdata = imputer(table)
101for i in range(20, 25):
102    print table[i]
103    print impdata[i]
104    print
105
106
107print "*** CUSTOM IMPUTATION BY MODELS ***"
108imputer = Orange.feature.imputation.Imputer_model()
109imputer.models = [None] * len(table.domain)
110imputer.models[table.domain.index("LANES")] = Orange.classification.ConstantClassifier(2.0)
111tord = Orange.classification.ConstantClassifier(Orange.data.Value(table.domain["T-OR-D"], "THROUGH"))
112imputer.models[table.domain.index("T-OR-D")] = tord
113
114import orngTree
115len_domain = Orange.data.Domain(["MATERIAL", "SPAN", "ERECTED", "LENGTH"], table.domain)
116len_data = Orange.data.Table(len_domain, table)
117len_tree = Orange.classification.tree.TreeLearner(len_data, minSubset=20)
118imputer.models[table.domain.index("LENGTH")] = len_tree
119orngTree.printTxt(len_tree)
120
121spanVar = table.domain["SPAN"]
122def computeSpan(ex, rw):
123    if ex["TYPE"] == "WOOD" or ex["PURPOSE"] == "WALK":
124        return orange.Value(spanVar, "SHORT")
125    else:
126        return orange.Value(spanVar, "MEDIUM")
127
128imputer.models[table.domain.index("SPAN")] = computeSpan
129
130for i in range(20, 25):
131    print table[i]
132    print impdata[i]
133    print
134
135
136print "*** IMPUTATION WITH SPECIAL VALUES ***"
137imputer = Orange.feature.imputation.ImputerConstructor_asValue(table)
138original = table[19]
139imputed = imputer(table[19])
140print original.domain
141print
142print imputed.domain
143print
144
145for i in original.domain:
146    print "%s: %s -> %s" % (original.domain[i].name, original[i], imputed[i.name]),
147    if original.domain[i].varType == Orange.core.VarTypes.Continuous:
148        print "(%s)" % imputed[i.name+"_def"]
149    else:
150        print
151print
152
153impdata = imputer(table)
154for i in range(20, 25):
155    print table[i]
156    print impdata[i]
157    print
Note: See TracBrowser for help on using the repository browser.