source: orange/Orange/doc/reference/distributions.py @ 9671:a7b056375472

Revision 9671:a7b056375472, 3.2 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Moved orange to Orange (part 2)

Line 
1# Description: Shows how to compute and print attribute distributions. Also shows how to approximate a continuous distribution by Gaussian distribution
2# Category:    statistics, distributions
3# Classes:     Distribution, DiscDistribution, ContDistribution, DomainDistributions, GaussianDistribution
4# Uses:        adult_sample
5# Referenced:  distributions.htm
6
7import orange
8
9data = orange.ExampleTable("../datasets/adult_sample")
10
11disc = orange.Distribution("workclass", data)
12print type(disc)
13print disc
14
15print "Distribution of 'workclass' (through values)"
16workclass = data.domain["workclass"]
17for i in range(len(workclass.values)):
18    print "%20s: %5.3f" % (workclass.values[i], disc[i])
19print
20
21print "Distribution of 'workclass' (through items)"
22for val, num in disc.items():
23    print "%20s: %5.3f" % (val, num)
24print
25
26disc[0] = disc[1] = 1000
27for i in range(20):
28    print disc.modus(),
29print
30
31disc[0] = disc[1] = 1000
32for i in range(20):
33    disc[2]=i
34    print disc.modus(),
35print
36
37disc = orange.Distribution("workclass", data)
38print "Private: ", disc["Private"]
39print "Private: ", disc[0]
40print "Private: ", disc[orange.Value(workclass, "Private")]
41
42print "length of distribution:", len(disc)
43print "no. of values:", len(workclass.values)
44
45print orange.Distribution(1, data)
46print orange.Distribution(data.domain["workclass"], data)
47
48cont = orange.Distribution("education-num", data)
49print type(cont)
50print cont
51
52dist = orange.DomainDistributions(data)
53
54for d in dist:
55    if d.variable.varType == orange.VarTypes.Discrete:
56        print "%30s: %s" % (d.variable.name, d)
57    else:
58        print "%30s: avg. %5.3f" % (d.variable.name, d.average())
59
60print "*** AGE ***"
61dage = dist["age"]
62print "Native representation:", dage.native()
63print "Keys:", dage.keys()
64print "Values:", dage.values()
65print "Items: ", dage.items()
66print "Average: %5.3f" % dage.average()
67print "Var/Dev/Err: %5.3f/%5.3f/%5.3f" % (dage.var(), dage.dev(), dage.error())
68print "Quartiles: %5.3f - %5.3f - %5.3f" % (dage.percentile(25), dage.percentile(50), dage.percentile(75))
69print
70
71for x in range(170, 190):
72    print "dens(%4.1f)=%5.3f," % (x/10.0, dage.density(x/10.0)),
73   
74
75print "*** WORKCLASS ***"
76dwcl = dist["workclass"]
77print "Native representation:", dwcl.native()
78print "Keys:", dwcl.keys()
79print "Values:", dwcl.values()
80print "Items: ", dwcl.items()
81print
82
83
84disc = orange.DiscDistribution([0.5, 0.3, 0.2])
85for i in range(20):
86    print disc.random(),
87print
88
89v = orange.EnumVariable(values = ["red", "green", "blue"])
90disc.variable = v
91for i in range(20):
92    print disc.random(),
93print
94
95print
96cont = orange.ContDistribution({0.1: 12, 0.3: 3, 0.7: 3})
97print "Manually constructed continuous distibution: ", cont
98print
99
100
101cont = orange.ContDistribution(data.domain["age"])
102
103
104gauss = orange.GaussianDistribution(10, 2)
105print "*** Gauss(10, 2) ***"
106print "Average: %5.3f" % gauss.average()
107print "Var/Dev/Err: %5.3f/%5.3f/%5.3f" % (gauss.var(), gauss.dev(), gauss.error())
108print
109
110for i in range(20):
111    print "%5.3f" % gauss.random(),
112print
113
114for i in range(60, 140, 5):
115    print "dens(%4.1f)=%5.3f" % (i/10.0, gauss.density(i/10.0)),
116print
117
118#dage.normalize()
119gage = orange.GaussianDistribution(dage)
120for x in range(17, 80):
121    print "%i\t%5.3f\t%5.3f" % (x, dage.density(x), gage.density(x))
Note: See TracBrowser for help on using the repository browser.