source: orange/orange/orngSVM_Jakulin.py @ 2213:7838dd2c80b1

Revision 2213:7838dd2c80b1, 8.7 KB checked in by ales_erjavec <ales.erjavec@…>, 8 years ago (diff)

* empty log message *

Line 
1# ORANGE Support Vector Machines
2#    by Alex Jakulin (jakulin@acm.org)
3#
4#       based on:
5#           Chih-Chung Chang and Chih-Jen Lin
6#           LIBSVM : a library for support vector machines.
7#           http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.ps.gz
8# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
9#
10# CVS Status: $Id$
11#
12# Version 1.8 (18/11/2003)
13#   - added error checking, updated to libsvm 2.5
14#
15# Version 1.7 (11/08/2002)
16#   - Assertion error was resulting because Orange incorrectly compared the
17#     attribute values as returned from .getclass() and the values array in
18#     the attribute definition. Cast both values to integer before comparison.
19#   - Support for setting the preferred ordinal attribute transformation.
20#
21# Version 1.6 (31/10/2001)
22#
23# To Do:
24#   - class-weighting SVM (each class c of c_1..c_k is weighted by k/p(c))
25#
26#   - make sure everything works when given an empty example table
27#
28#
29
30import orange
31import orngCRS
32import orng2Array
33import math
34
35class BasicSVMLearner(orange.Learner):
36  def __init__(self):
37      self._name = "SVM Learner Wrap"
38      # "SVM type (C_SVC=0, NU_SVC, ONE_CLASS, EPS_SVR, NU_SVR=4)
39      # SV-classifier : "ordinary" SVM
40      # nu-SV-classifier : nu controls the complexity of the model
41      # ONE_CLASS: only one class -- is something in or out
42      # epsilon-SVR: epsilon-regression
43      # epsilon-SVR: nu-regression
44      self.type = -1 # -1: classical, -2: NU, -3: OC
45
46      # kernel type: (LINEAR=0, POLY, RBF, SIGMOID=3)
47      # linear: x[i].x[j]
48      # poly: pow(gamma*x[i].x[j]+coef0, degree)
49      # rbf: exp(-gamma*(x[i]^2+x[j]^2-2*x[i].x[j]))
50      # sigm: tanh(gamma*x[i].x[j]+coef0)
51      self.kernel = 2
52
53      # poly degree when POLY
54      self.degree = 3
55     
56      # poly/rbf/sigm parameter
57      # if 0.0, it is assigned the default value of 1.0/#attributes
58      self.gamma = 0.0
59
60      # poly/sigm     
61      self.coef0 = 0.0
62
63      # complexity control with NU_SVC, NU_SVR in ONE_CLASS,
64      # the bigger, the less complex the model
65      self.nu = 0.5
66
67      # cache size in MB
68      self.cache_size = 40
69
70      # for C_SVR
71      self.p = 0.5
72
73      # for SVC, SVR and NU_SVR
74      # greater the cost of misclassification, greater the likelihood of overfitting
75      self.C = 1.0
76
77      # tolerance
78      self.eps = 1e-3
79
80      # shrinking heuristic (1=on, 0=off)
81      self.shrinking = 1
82
83      # probability (1 = on, 0 = off)
84      self.probability = 0
85
86      # class weights
87      self.classweights = []
88
89      self.translation_mode_d = 1
90      self.translation_mode_c = 1
91      self.for_nomogram = 0
92
93      self.normalize = 0     
94     
95  def getmodel(self,data,fulldata):
96      # make sure that regression is used for continuous classes, and classification
97      # for discrete class
98      assert(data.domain.classVar.varType == 1 or data.domain.classVar.varType == 2)
99      typ = self.type
100      if typ == -1: # Classical
101        if data.domain.classVar.varType == 2: # continuous class
102          typ = 3 # regression
103        else: # discrete class
104          typ = 0 # classification
105      elif typ == -2: # Nu
106        if data.domain.classVar.varType == 2: # continuous class
107          typ = 4 # regression
108        else: # discrete class
109          typ = 1 # classification
110      elif typ == -3: # OC
111        typ = 2 # one-class, class is ignored.
112
113      # do error checking
114      if type(self.degree) == type(1):
115          self.degree = float(self.degree)
116      if type(self.cache_size) == type(1):
117          self.cache_size = float(self.cache_size)
118      assert(type(self.degree) == type(1.0))
119      assert(type(self.gamma) == type(1.0))
120      assert(type(self.coef0) == type(1.0))
121      assert(type(self.nu) == type(1.0))
122      assert(type(self.cache_size) == type(1.0))
123      assert(type(self.C) == type(1.0))
124      assert(type(self.eps) == type(1.0))
125      assert(type(self.p) == type(1.0))
126      assert(typ in [0,1,2,3,4])
127      assert(self.kernel in [0,1,2,3])
128      assert(self.cache_size > 0)
129      assert(self.eps > 0)
130      assert(self.nu <= 1.0 and self.nu >= 0.0)
131      assert(self.p >= 0.0)
132      assert(self.shrinking in [0,1])
133      assert(self.probability in [0,1]) 
134      if type == 1:
135        counts = [0]*len(data.domain.classVar.values)
136        for x in data:
137          counts[int(x.getclass())] += 1
138        for i in range(1,len(counts)):
139          for j in range(i):
140            if self.nu*(counts[i]+counts[j]) > 2*min(counts[i],counts[j]):
141              raise "Infeasible nu value."
142
143      puredata = orange.Filter_hasClassValue(data)
144      translate = orng2Array.DomainTranslation(self.translation_mode_d,self.translation_mode_c)
145      if fulldata != 0:
146          purefulldata = orange.Filter_hasClassValue(fulldata)
147          translate.analyse(purefulldata)
148      else:
149          translate.analyse(puredata)
150      translate.prepareSVM(not self.for_nomogram)
151      mdata = translate.transform(puredata)
152
153      if len(self.classweights)==0:
154          model = orngCRS.SVMLearn(mdata, typ, self.kernel, self.degree, self.gamma, self.coef0, self.nu, self.cache_size, self.C, self.eps, self.p, self.shrinking, self.probability, 0, [], [])
155      else:
156          assert(len(puredata.domain.classVar.values)==len(self.classweights))
157          cvals = [data.domain.classVar(i) for i in data.domain.classVar.values]
158          labels = translate.transformClass(cvals)
159          model = orngCRS.SVMLearn(mdata, typ, self.kernel, self.degree, self.gamma, self.coef0, self.nu, self.cache_size, self.C, self.eps, self.p, self.shrinking, self.probability, len(self.classweights), self.classweights, labels)
160      return (model, translate)
161
162  def __call__(self, data, weights = 0,fulldata=0):
163      # note that weights are ignored
164      (model, translate) = self.getmodel(data,fulldata)
165      return BasicSVMClassifier(model,translate,normalize=(self.normalize or self.for_nomogram))
166
167class BasicSVMClassifier(orange.Classifier):
168  def __init__(self, model, translate, normalize):
169      self._name = "SVM Classifier Wrap"
170      self.model = model
171      self.cmodel = orngCRS.SVMClassifier(model)
172      self.translate = translate
173      self.normalize = normalize
174      if model["svm_type"] in [0,1]:
175          self.classifier = 1
176          if model.has_key("ProbA") and model.has_key("ProbB"):
177              self.probabilistic = 1
178          else:
179              self.probabilistic = 0
180          self.classLUT = [self.translate.getClass(q) for q in model["label"]]
181          self.iclassLUT = [int(q) for q in self.classLUT]
182      else:
183          self.probabilistic = 0
184          self.classifier = 0
185         
186      if normalize and model['kernel_type'] == 0 and model["svm_type"] == 0 and model["nr_class"] == 2:
187          beta = model["rho"][0]
188          svs = model["SV"]
189          ll = -1
190          for i in xrange(model["total_sv"]):
191              ll = max(ll,svs[i][-1][0])
192          xcoeffs = [0.0]*(ll)
193          for i in xrange(model["total_sv"]):
194              csv = svs[i]
195              coef = csv[0][0]
196              for (j,v) in csv[1:]:
197                  xcoeffs[j-1] += coef*v
198          sum = 0.0
199          for x in xcoeffs:
200              sum += x*x
201          self.coefficient = 1.0/math.sqrt(sum)
202          self.xcoeffs = [x*self.coefficient for x in xcoeffs]
203          self.beta = beta*self.coefficient
204      else:
205          self.coefficient = 1.0
206         
207  def getmargin(self, example):
208      # classification with margins
209      assert(self.model['nr_class'] <= 2) # this should work only with 2-class problems
210      if self.model['nr_class'] == 2:
211        td = self.translate.extransform(example)
212        margin = orngCRS.SVMClassifyM(self.cmodel,td)
213        if self.normalize:
214            return margin[0]*self.coefficient
215        else:
216            return margin[0]
217      else:
218        # it can happen that there is a single class
219        return 0.0
220
221  def __call__(self, example, format = orange.GetValue):
222      # classification
223      td = self.translate.extransform(example)
224      x = orngCRS.SVMClassify(self.cmodel,td)
225      v = self.translate.getClass(x)
226      if self.probabilistic:
227          px = orngCRS.SVMClassifyP(self.cmodel,td)
228          p = [0.0]*len(self.translate.cv.attr.values)
229          for i in xrange(len(self.iclassLUT)):
230              p[self.iclassLUT[i]] = px[i]
231      elif self.model['svm_type']==0 or self.model['svm_type']==1:
232          p = [0.0]*len(self.translate.cv.attr.values)
233          p[int(v)] = 1.0
234
235      if format == orange.GetValue or self.model['svm_type']==3 or self.model['svm_type']==2:
236          # do not return and PD when we're dealing with regression, or one-class
237          return v
238      if format == orange.GetBoth:
239          return (v,p)
240      if format == orange.GetProbabilities:
241          return p
242
243  def __del__(self):
244    orngCRS.svm_destroy_model(self.cmodel)
Note: See TracBrowser for help on using the repository browser.