source: orange/docs/reference/rst/code/scoring-info-lenses.py @ 9372:aef193695ea9

Revision 9372:aef193695ea9, 3.1 KB checked in by mitar, 2 years ago (diff)

Moved documentation to the separate directory.

Line 
1# Description: Shows how to assess the quality of attributes
2# Category:    feature scoring
3# Uses:        lenses
4# Referenced:  Orange.feature.html#scoring
5# Classes:     Orange.feature.scoring.Measure, Orange.features.scoring.Info
6
7import Orange, random
8
9table = Orange.data.Table("lenses")
10
11meas = Orange.feature.scoring.InfoGain()
12
13astigm = table.domain["astigmatic"]
14print "Information gain of 'astigmatic': %6.4f" % meas(astigm, table)
15
16classdistr = Orange.statistics.distribution.Distribution(table.domain.class_var, table)
17cont = Orange.statistics.contingency.VarClass("tear_rate", table)
18print "Information gain of 'tear_rate': %6.4f" % meas(cont, classdistr)
19
20dcont = Orange.statistics.contingency.Domain(table)
21print "Information gain of the first attribute: %6.4f" % meas(0, dcont)
22print
23
24print "*** A set of more exhaustive tests for different way of passing arguments to MeasureAttribute ***"
25
26names = [a.name for a in table.domain.attributes]
27attrs = len(names)
28
29print ("%30s"+"%15s"*attrs) % (("",) + tuple(names))
30
31fstr = "%30s" + "%15.4f"*attrs
32
33
34print "Computing information gain directly from examples"
35print fstr % (("- by attribute number:",) + tuple([meas(i, table) for i in range(attrs)]))
36print fstr % (("- by attribute name:",) + tuple([meas(i, table) for i in names]))
37print fstr % (("- by attribute descriptor:",) + tuple([meas(i, table) for i in table.domain.attributes]))
38print
39
40dcont = Orange.statistics.contingency.Domain(table)
41print "Computing information gain from DomainContingency"
42print fstr % (("- by attribute number:",) + tuple([meas(i, dcont) for i in range(attrs)]))
43print fstr % (("- by attribute name:",) + tuple([meas(i, dcont) for i in names]))
44print fstr % (("- by attribute descriptor:",) + tuple([meas(i, dcont) for i in table.domain.attributes]))
45print
46
47print "Computing information gain from DomainContingency"
48cdist = Orange.statistics.distribution.Distribution(table.domain.class_var, table)
49print fstr % (("- by attribute number:",) + tuple([meas(Orange.statistics.contingency.VarClass(i, table), cdist) for i in range(attrs)]))
50print fstr % (("- by attribute name:",) + tuple([meas(Orange.statistics.contingency.VarClass(i, table), cdist) for i in names]))
51print fstr % (("- by attribute descriptor:",) + tuple([meas(Orange.statistics.contingency.VarClass(i, table), cdist) for i in table.domain.attributes]))
52print
53
54values = ["v%i" % i for i in range(len(table.domain[2].values)*len(table.domain[3].values))]
55cartesian = Orange.data.variable.Discrete("cart", values = values)
56cartesian.get_value_from = Orange.classification.lookup.ClassifierByLookupTable(cartesian, table.domain[2], table.domain[3], values)
57
58print "Information gain of Cartesian product of %s and %s: %6.4f" % (table.domain[2].name, table.domain[3].name, meas(cartesian, table))
59
60mid = Orange.core.newmetaid()
61table.domain.add_meta(mid, Orange.data.variable.Discrete(values = ["v0", "v1"]))
62table.add_meta_attribute(mid)
63
64rg = random.Random()
65rg.seed(0)
66for ex in table:
67    ex[mid] = Orange.data.Value(rg.randint(0, 1))
68
69print "Information gain for a random meta attribute: %6.4f" % meas(mid, table)
Note: See TracBrowser for help on using the repository browser.