source: orange/Orange/doc/reference/MeasureAttribute1.py @ 9671:a7b056375472

Revision 9671:a7b056375472, 2.8 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Moved orange to Orange (part 2)

Line 
1# Description: Shows how to assess the quality of attributes
2# Category:    attribute quality
3# Classes:     MeasureAttribute, MeasureAttribute_info,
4# Uses:        lenses
5# Referenced:  MeasureAttribute.htm
6
7import orange, random
8data = orange.ExampleTable("lenses")
9
10meas = orange.MeasureAttribute_info()
11
12astigm = data.domain["astigmatic"]
13print "Information gain of 'astigmatic': %6.4f" % meas(astigm, data)
14
15classdistr = orange.Distribution(data.domain.classVar, data)
16cont = orange.ContingencyAttrClass("tear_rate", data)
17print "Information gain of 'tear_rate': %6.4f" % meas(cont, classdistr)
18
19dcont = orange.DomainContingency(data)
20print "Information gain of the first attribute: %6.4f" % meas(0, dcont)
21print
22
23print "*** A set of more exhaustive tests for different way of passing arguments to MeasureAttribute ***"
24
25names = [a.name for a in data.domain.attributes]
26attrs = len(names)
27
28print ("%30s"+"%15s"*attrs) % (("",) + tuple(names))
29
30fstr = "%30s" + "%15.4f"*attrs
31
32
33print "Computing information gain directly from examples"
34print fstr % (("- by attribute number:",) + tuple([meas(i, data) for i in range(attrs)]))
35print fstr % (("- by attribute name:",) + tuple([meas(i, data) for i in names]))
36print fstr % (("- by attribute descriptor:",) + tuple([meas(i, data) for i in data.domain.attributes]))
37print
38
39dcont = orange.DomainContingency(data)
40print "Computing information gain from DomainContingency"
41print fstr % (("- by attribute number:",) + tuple([meas(i, dcont) for i in range(attrs)]))
42print fstr % (("- by attribute name:",) + tuple([meas(i, dcont) for i in names]))
43print fstr % (("- by attribute descriptor:",) + tuple([meas(i, dcont) for i in data.domain.attributes]))
44print
45
46print "Computing information gain from DomainContingency"
47cdist = orange.Distribution(data.domain.classVar, data)
48print fstr % (("- by attribute number:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in range(attrs)]))
49print fstr % (("- by attribute name:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in names]))
50print fstr % (("- by attribute descriptor:",) + tuple([meas(orange.ContingencyAttrClass(i, data), cdist) for i in data.domain.attributes]))
51print
52
53values = ["v%i" % i for i in range(len(data.domain[2].values)*len(data.domain[3].values))]
54cartesian = orange.EnumVariable("cart", values = values)
55cartesian.getValueFrom = orange.ClassifierByLookupTable(cartesian, data.domain[2], data.domain[3], values)
56
57print "Information gain of Cartesian product of %s and %s: %6.4f" % (data.domain[2].name, data.domain[3].name, meas(cartesian, data))
58
59mid = orange.newmetaid()
60data.domain.addmeta(mid, orange.EnumVariable(values = ["v0", "v1"]))
61data.addMetaAttribute(mid)
62
63rg = random.Random()
64rg.seed(0)
65for ex in data:
66    ex[mid] = orange.Value(rg.randint(0, 1))
67
68print "Information gain for a random meta attribute: %6.4f" % meas(mid, data)
Note: See TracBrowser for help on using the repository browser.