source: orange/docs/tutorial/rst/code/fss7.py @ 9374:59bac7ddd8a2

Revision 9374:59bac7ddd8a2, 1.9 KB checked in by mitar, 2 years ago (diff)

Tutorial documentation structure.

Line 
1# Author:      B Zupan
2# Version:     1.0
3# Description: Shows the use of feature subset selection and compares
4#              plain naive Bayes (with discretization) and the same classifier but with
5#              feature subset selection. On crx data set, both classifiers achieve similarly
6#              accuracy but naive Bayes with feature subset selection uses substantially
7#              less features. Wrappers FilteredLearner and DiscretizedLearner are used,
8#              and example illustrates how to analyze classifiers used in ten-fold cross
9#              validation (how many and which attributes were used?).
10# Category:    preprocessing
11# Uses:        crx.tab
12# Referenced:  o_fss.htm
13
14import orange, orngDisc, orngTest, orngStat, orngFSS
15
16data = orange.ExampleTable("../../datasets/crx")
17
18bayes = orange.BayesLearner()
19dBayes = orngDisc.DiscretizedLearner(bayes, name='disc bayes')
20fss = orngFSS.FilterAttsAboveThresh(threshold=0.05)
21fBayes = orngFSS.FilteredLearner(dBayes, filter=fss, name='bayes & fss')
22
23learners = [dBayes, fBayes]
24results = orngTest.crossValidation(learners, data, folds=10, storeClassifiers=1)
25
26# how many attributes did each classifier use?
27
28natt = [0.] * len(learners)
29for fold in range(results.numberOfIterations):
30  for lrn in range(len(learners)):
31    natt[lrn] += len(results.classifiers[fold][lrn].domain.attributes)
32for lrn in range(len(learners)):
33  natt[lrn] = natt[lrn]/10.
34
35print "\nLearner         Accuracy  #Atts"
36for i in range(len(learners)):
37  print "%-15s %5.3f     %5.2f" % (learners[i].name, orngStat.CA(results)[i], natt[i])
38
39# which attributes were used in filtered case?
40
41print '\nAttribute usage (in how many folds attribute was used?):'
42used = {}
43for fold in range(results.numberOfIterations):
44  for att in results.classifiers[fold][1].domain.attributes:
45    a = att.name
46    if a in used.keys(): used[a] += 1
47    else: used[a] = 1
48for a in used.keys():
49  print '%2d x %s' % (used[a], a)
Note: See TracBrowser for help on using the repository browser.