source: orange/Orange/testing/regression/tests_20/modules_fss7.py @ 9952:986e9cd806f4

Revision 9952:986e9cd806f4, 1.9 KB checked in by Miha Stajdohar <miha.stajdohar@…>, 2 years ago (diff)

Tests moved and renamed from orange 20.

Line 
1# Description: Shows the use of feature subset selection and compares
2#              plain naive Bayes (with discretization) and the same classifier but with
3#              feature subset selection. On crx data set, both classifiers achieve similarly
4#              accuracy but naive Bayes with feature subset selection uses substantially
5#              less features. Wrappers FilteredLearner and DiscretizedLearner are used,
6#              and example illustrates how to analyze classifiers used in ten-fold cross
7#              validation (how many and which attributes were used?).
8# Category:    preprocessing
9# Uses:        crx.tab
10# Referenced:  orngFSS.htm
11
12import orange, orngDisc, orngTest, orngStat, orngFSS
13
14data = orange.ExampleTable("../datasets/crx")
15
16bayes = orange.BayesLearner()
17dBayes = orngDisc.DiscretizedLearner(bayes, name='disc bayes')
18fss = orngFSS.FilterAttsAboveThresh(threshold=0.05)
19fBayes = orngFSS.FilteredLearner(dBayes, filter=fss, name='bayes & fss')
20
21learners = [dBayes, fBayes]
22results = orngTest.crossValidation(learners, data, folds=10, storeClassifiers=1)
23
24# how many attributes did each classifier use?
25
26natt = [0.] * len(learners)
27for fold in range(results.numberOfIterations):
28  for lrn in range(len(learners)):
29    natt[lrn] += len(results.classifiers[fold][lrn].domain.attributes)
30for lrn in range(len(learners)):
31  natt[lrn] = natt[lrn]/10.
32
33print "\nLearner         Accuracy  #Atts"
34for i in range(len(learners)):
35  print "%-15s %5.3f     %5.2f" % (learners[i].name, orngStat.CA(results)[i], natt[i])
36
37# which attributes were used in filtered case?
38
39print '\nAttribute usage (in how many folds attribute was used?):'
40used = {}
41for fold in range(results.numberOfIterations):
42  for att in results.classifiers[fold][1].domain.attributes:
43    a = att.name
44    if a in used.keys(): used[a] += 1
45    else: used[a] = 1
46for a in used.keys():
47  print '%2d x %s' % (used[a], a)
Note: See TracBrowser for help on using the repository browser.