source: orange/docs/tutorial/rst/code/fss7.py @ 11054:eca373fb96a9

Revision 11054:eca373fb96a9, 2.0 KB checked in by blaz <blaz.zupan@…>, 16 months ago (diff)

new tutorial (refresh after removal of old files)

Line 
1# Author:      B Zupan
2# Version:     1.0
3# Description: Shows the use of feature subset selection and compares
4#              plain naive Bayes (with discretization) and the same classifier but with
5#              feature subset selection. On crx data set, both classifiers achieve similarly
6#              accuracy but naive Bayes with feature subset selection uses substantially
7#              less features. Wrappers FilteredLearner and DiscretizedLearner are used,
8#              and example illustrates how to analyze classifiers used in ten-fold cross
9#              validation (how many and which attributes were used?).
10# Category:    preprocessing
11# Uses:        crx.tab
12# Referenced:  o_fss.htm
13
14import orngFSS
15import Orange
16
17data = Orange.data.Table("crx.tab")
18
19bayes = Orange.classification.bayes.NaiveLearner()
20dBayes = Orange.feature.discretization.DiscretizedLearner(bayes, name='disc bayes')
21fss = Orange.feature.selection.FilterAboveThreshold(threshold=0.05)
22fBayes = Orange.feature.selection.FilteredLearner(dBayes, filter=fss, name='bayes & fss')
23
24learners = [dBayes, fBayes]
25results = Orange.evaluation.testing.cross_validation(learners, data, folds=10, storeClassifiers=1)
26
27# how many attributes did each classifier use?
28
29natt = [0.] * len(learners)
30for fold in range(results.numberOfIterations):
31  for lrn in range(len(learners)):
32    natt[lrn] += len(results.classifiers[fold][lrn].domain.attributes)
33for lrn in range(len(learners)):
34  natt[lrn] = natt[lrn] / 10.
35
36print "\nLearner         Accuracy  #Atts"
37for i in range(len(learners)):
38  print "%-15s %5.3f     %5.2f" % (learners[i].name, Orange.evaluation.scoring.CA(results)[i], natt[i])
39
40# which attributes were used in filtered case?
41
42print '\nAttribute usage (in how many folds attribute was used?):'
43used = {}
44for fold in range(results.numberOfIterations):
45  for att in results.classifiers[fold][1].domain.attributes:
46    a = att.name
47    if a in used.keys(): used[a] += 1
48    else: used[a] = 1
49for a in used.keys():
50  print '%2d x %s' % (used[a], a)
Note: See TracBrowser for help on using the repository browser.