source: orange/Orange/classification/svm/__init__.py @ 10300:28e3a1a057c6

Revision 10300:28e3a1a057c6, 31.0 KB checked in by Miha Stajdohar <miha.stajdohar@…>, 2 years ago (diff)

Attribute weight had wrong default () -> [].

Line 
1"""
2.. index:: support vector machines (SVM)
3.. index:
4   single: classification; support vector machines (SVM)
5   
6*********************************
7Support Vector Machines (``svm``)
8*********************************
9
10This is a module for `Support Vector Machine`_ (SVM) classification. It
11exposes the underlying `LibSVM`_ and `LIBLINEAR`_ library in a standard
12Orange Learner/Classifier interface.
13
14Choosing the right learner
15==========================
16
17Choose an SVM learner suitable for the problem.
18:obj:`SVMLearner` is a general SVM learner. :obj:`SVMLearnerEasy` will
19help with the data normalization and parameter tuning. Learn with a fast
20:obj:`LinearSVMLearner` on data sets with a large number of features.
21
22.. note:: SVM can perform poorly on some data sets. Choose the parameters
23          carefully. In cases of low classification accuracy, try scaling the
24          data and experiment with different parameters. \
25          :obj:`SVMLearnerEasy` class does this automatically (it is similar
26          to the `svm-easy.py` script in the LibSVM distribution).
27
28         
29SVM learners (from `LibSVM`_)
30=============================
31
32The most basic :class:`SVMLearner` implements the standard `LibSVM`_ learner
33It supports four built-in kernel types (Linear, Polynomial, RBF and Sigmoid).
34Additionally kernel functions defined in Python can be used instead.
35
36.. note:: For learning from ordinary :class:`Orange.data.Table` use the \
37    :class:`SVMLearner`. For learning from sparse dataset (i.e.
38    data in `basket` format) use the :class:`SVMLearnerSparse` class.
39
40.. autoclass:: Orange.classification.svm.SVMLearner
41    :members:
42
43.. autoclass:: Orange.classification.svm.SVMLearnerSparse
44    :members:
45    :show-inheritance:
46   
47.. autoclass:: Orange.classification.svm.SVMLearnerEasy
48    :members:
49    :show-inheritance:
50
51The next example shows how to use SVM learners and that :obj:`SVMLearnerEasy`
52with automatic data preprocessing and parameter tuning
53outperforms :obj:`SVMLearner` with the default :obj:`~SVMLearner.nu` and :obj:`~SVMLearner.gamma`: 
54   
55.. literalinclude:: code/svm-easy.py
56
57
58   
59Linear SVM learners (from `LIBLINEAR`_)
60=======================================
61
62The :class:`LinearSVMLearner` learner is more suitable for large scale
63problems as it is significantly faster then :class:`SVMLearner` and its
64subclasses. A down side is it only supports a linear kernel (as the name
65suggests) and does not support probability estimation for the
66classifications. Furthermore a Multi-class SVM learner
67:class:`MultiClassSVMLearner` is provided.
68   
69.. autoclass:: Orange.classification.svm.LinearSVMLearner
70   :members:
71   
72.. autoclass:: Orange.classification.svm.MultiClassSVMLearner
73   :members:
74   
75   
76SVM Based feature selection and scoring
77=======================================
78
79.. autoclass:: Orange.classification.svm.RFE
80
81.. autoclass:: Orange.classification.svm.ScoreSVMWeights
82    :show-inheritance:
83 
84 
85Utility functions
86=================
87
88.. automethod:: Orange.classification.svm.max_nu
89
90.. automethod:: Orange.classification.svm.get_linear_svm_weights
91
92.. automethod:: Orange.classification.svm.table_to_svm_format
93
94The following example shows how to get linear SVM weights:
95   
96.. literalinclude:: code/svm-linear-weights.py   
97
98
99.. _kernel-wrapper:
100
101Kernel wrappers
102===============
103
104Kernel wrappers are helper classes used to build custom kernels for use
105with :class:`SVMLearner` and subclasses. All wrapper constructors take
106one or more Python functions (`wrapped` attribute) to wrap. The
107function must be a positive definite kernel, taking two arguments of
108type :class:`Orange.data.Instance` and return a float.
109
110.. autoclass:: Orange.classification.svm.kernels.KernelWrapper
111   :members:
112
113.. autoclass:: Orange.classification.svm.kernels.DualKernelWrapper
114   :members:
115
116.. autoclass:: Orange.classification.svm.kernels.RBFKernelWrapper
117   :members:
118
119.. autoclass:: Orange.classification.svm.kernels.PolyKernelWrapper
120   :members:
121
122.. autoclass:: Orange.classification.svm.kernels.AdditionKernelWrapper
123   :members:
124
125.. autoclass:: Orange.classification.svm.kernels.MultiplicationKernelWrapper
126   :members:
127
128.. autoclass:: Orange.classification.svm.kernels.CompositeKernelWrapper
129   :members:
130
131.. autoclass:: Orange.classification.svm.kernels.SparseLinKernel
132   :members:
133
134Example:
135
136.. literalinclude:: code/svm-custom-kernel.py
137
138.. _`Support Vector Machine`: http://en.wikipedia.org/wiki/Support_vector_machine
139.. _`LibSVM`: http://www.csie.ntu.edu.tw/~cjlin/libsvm/
140.. _`LIBLINEAR`: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
141
142"""
143
144import math
145
146from collections import defaultdict
147
148import Orange.core
149import Orange.data
150import Orange.misc
151import Orange.feature
152
153import kernels
154import warnings
155
156from Orange.core import SVMLearner as _SVMLearner
157from Orange.core import SVMLearnerSparse as _SVMLearnerSparse
158from Orange.core import LinearClassifier, \
159                        LinearLearner, \
160                        SVMClassifier, \
161                        SVMClassifierSparse
162
163from Orange.preprocess import Preprocessor_impute, \
164                              Preprocessor_continuize, \
165                              Preprocessor_preprocessorList, \
166                              DomainContinuizer
167
168from Orange import feature as variable
169
170from Orange.misc import _orange__new__
171
172def max_nu(data):
173    """Return the maximum nu parameter for Nu_SVC support vector learning
174    for the given data table.
175   
176    :param data: Data with discrete class variable
177    :type data: Orange.data.Table
178   
179    """
180    nu = 1.0
181    dist = list(Orange.core.Distribution(data.domain.classVar, data))
182    def pairs(seq):
183        for i, n1 in enumerate(seq):
184            for n2 in seq[i + 1:]:
185                yield n1, n2
186    return min([2.0 * min(n1, n2) / (n1 + n2) for n1, n2 in pairs(dist) \
187                if n1 != 0 and n2 != 0] + [nu])
188
189maxNu = max_nu
190
191class SVMLearner(_SVMLearner):
192    """
193    :param svm_type: defines the SVM type (can be C_SVC, Nu_SVC
194        (default), OneClass, Epsilon_SVR, Nu_SVR)
195    :type svm_type: SVMLearner.SVMType
196    :param kernel_type: defines the kernel type for learning
197        (can be kernels.RBF (default), kernels.Linear, kernels.Polynomial,
198        kernels.Sigmoid, kernels.Custom)
199    :type kernel_type: SVMLearner.Kernel
200    :param degree: kernel parameter (for Polynomial) (default 3)
201    :type degree: int
202    :param gamma: kernel parameter (Polynomial/RBF/Sigmoid)
203        (default 1.0/num_of_features)
204    :type gamma: float
205    :param coef0: kernel parameter (Polynomial/Sigmoid) (default 0)
206    :type coef0: int
207    :param kernel_func: function that will be called if `kernel_type` is
208        `kernels.Custom`. It must accept two :obj:`Orange.data.Instance`
209        arguments and return a float (see :ref:`kernel-wrapper` for some
210        examples).
211    :type kernel_func: callable function
212    :param C: C parameter for C_SVC, Epsilon_SVR and Nu_SVR
213    :type C: float
214    :param nu: Nu parameter for Nu_SVC, Nu_SVR and OneClass (default 0.5)
215    :type nu: float
216    :param p: epsilon in loss-function for Epsilon_SVR
217    :type p: float
218    :param cache_size: cache memory size in MB (default 200)
219    :type cache_size: int
220    :param eps: tolerance of termination criterion (default 0.001)
221    :type eps: float
222    :param probability: build a probability model
223        (default False)
224    :type probability: bool
225    :param shrinking: use shrinking heuristics
226        (default True)
227    :type shrinking: bool
228    :param weight: a list of class weights
229    :type weight: list
230   
231    Example:
232   
233        >>> import Orange
234        >>> from Orange.classification import svm
235        >>> from Orange.evaluation import testing, scoring
236        >>> table = Orange.data.Table("vehicle.tab")
237        >>> learner = svm.SVMLearner()
238        >>> results = testing.cross_validation([learner], table, folds=5)
239        >>> print scoring.CA(results)[0]
240        0.789613644274
241   
242    """
243    __new__ = _orange__new__(_SVMLearner)
244
245    C_SVC = _SVMLearner.C_SVC
246    Nu_SVC = _SVMLearner.Nu_SVC
247    OneClass = _SVMLearner.OneClass
248    Nu_SVR = _SVMLearner.Nu_SVR
249    Epsilon_SVR = _SVMLearner.Epsilon_SVR
250
251    @Orange.misc.deprecated_keywords({"kernelFunc": "kernel_func"})
252    def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF,
253                 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3,
254                 coef0=0, shrinking=True, probability=True, verbose=False,
255                 cache_size=200, eps=0.001, normalization=True,
256                 weight=[], **kwargs):
257        self.svm_type = svm_type
258        self.kernel_type = kernel_type
259        self.kernel_func = kernel_func
260        self.C = C
261        self.nu = nu
262        self.p = p
263        self.gamma = gamma
264        self.degree = degree
265        self.coef0 = coef0
266        self.shrinking = shrinking
267        self.probability = probability
268        self.verbose = verbose
269        self.cache_size = cache_size
270        self.eps = eps
271        self.normalization = normalization
272        for key, val in kwargs.items():
273            setattr(self, key, val)
274        self.learner = Orange.core.SVMLearner(**kwargs)
275        self.weight = weight
276
277    max_nu = staticmethod(max_nu)
278
279    def __call__(self, data, weight=0):
280        """Construct a SVM classifier
281       
282        :param table: data with continuous features
283        :type table: Orange.data.Table
284       
285        :param weight: unused - use the constructors ``weight``
286            parameter to set class weights
287       
288        """
289
290        examples = Orange.core.Preprocessor_dropMissingClasses(data)
291        class_var = examples.domain.class_var
292        if len(examples) == 0:
293            raise ValueError("Example table is without any defined classes")
294
295        # Fix the svm_type parameter if we have a class_var/svm_type mismatch
296        if self.svm_type in [0, 1] and \
297            isinstance(class_var, Orange.feature.Continuous):
298            self.svm_type += 3
299            #raise AttributeError, "Cannot learn a discrete classifier from non descrete class data. Use EPSILON_SVR or NU_SVR for regression"
300        if self.svm_type in [3, 4] and \
301            isinstance(class_var, Orange.feature.Discrete):
302            self.svm_type -= 3
303            #raise AttributeError, "Cannot do regression on descrete class data. Use C_SVC or NU_SVC for classification"
304        if self.kernel_type == kernels.Custom and not self.kernel_func:
305            raise ValueError("Custom kernel function not supplied")
306
307        import warnings
308
309        nu = self.nu
310        if self.svm_type == SVMLearner.Nu_SVC: #is nu feasible
311            max_nu = self.max_nu(examples)
312            if self.nu > max_nu:
313                if getattr(self, "verbose", 0):
314                    warnings.warn("Specified nu %.3f is infeasible. \
315                    Setting nu to %.3f" % (self.nu, max_nu))
316                nu = max(max_nu - 1e-7, 0.0)
317
318        for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p",
319                     "gamma", "degree", "coef0", "shrinking", "probability",
320                     "verbose", "cache_size", "eps"]:
321            setattr(self.learner, name, getattr(self, name))
322        self.learner.nu = nu
323        self.learner.set_weights(self.weight)
324
325        if self.svm_type == SVMLearner.OneClass and self.probability:
326            self.learner.probability = False
327            warnings.warn("One-class SVM probability output not supported yet.")
328        return self.learn_classifier(examples)
329
330    def learn_classifier(self, data):
331        if self.normalization:
332            data = self._normalize(data)
333            svm = self.learner(data)
334            return SVMClassifierWrapper(svm)
335        return self.learner(data)
336
337    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"})
338    def tune_parameters(self, data, parameters=None, folds=5, verbose=0,
339                       progress_callback=None):
340        """Tune the ``parameters`` on given ``data`` using
341        cross validation.
342       
343        :param data: data for parameter tuning
344        :type data: Orange.data.Table
345        :param parameters: defaults to ["nu", "C", "gamma"]
346        :type parameters: list of strings
347        :param folds: number of folds used for cross validation
348        :type folds: int
349        :param verbose: default False
350        :type verbose: bool
351        :param progress_callback: report progress
352        :type progress_callback: callback function
353           
354        An example that tunes the `gamma` parameter on `data` using 3-fold cross
355        validation. ::
356
357            svm = Orange.classification.svm.SVMLearner()
358            svm.tune_parameters(table, parameters=["gamma"], folds=3)
359                   
360        """
361
362        import orngWrap
363
364        if parameters is None:
365            parameters = ["nu", "C", "gamma"]
366
367        searchParams = []
368        normalization = self.normalization
369        if normalization:
370            data = self._normalize(data)
371            self.normalization = False
372        if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \
373                    and "nu" in parameters:
374            numOfNuValues = 9
375            if isinstance(data.domain.class_var, variable.Discrete):
376                max_nu = max(self.max_nu(data) - 1e-7, 0.0)
377            else:
378                max_nu = 1.0
379            searchParams.append(("nu", [i / 10.0 for i in range(1, 9) if \
380                                        i / 10.0 < max_nu] + [max_nu]))
381        elif "C" in parameters:
382            searchParams.append(("C", [2 ** a for a in  range(-5, 15, 2)]))
383        if self.kernel_type == 2 and "gamma" in parameters:
384            searchParams.append(("gamma", [2 ** a for a in range(-5, 5, 2)] + [0]))
385        tunedLearner = orngWrap.TuneMParameters(object=self,
386                            parameters=searchParams,
387                            folds=folds,
388                            returnWhat=orngWrap.TuneMParameters.returnLearner,
389                            progressCallback=progress_callback
390                            if progress_callback else lambda i:None)
391        tunedLearner(data, verbose=verbose)
392        if normalization:
393            self.normalization = normalization
394
395    def _normalize(self, data):
396        dc = Orange.core.DomainContinuizer()
397        dc.class_treatment = Orange.core.DomainContinuizer.Ignore
398        dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
399        dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
400        newdomain = dc(data)
401        return data.translate(newdomain)
402
403SVMLearner = Orange.misc.deprecated_members({
404    "learnClassifier": "learn_classifier",
405    "tuneParameters": "tune_parameters",
406    "kernelFunc" : "kernel_func",
407    },
408    wrap_methods=["__init__", "tune_parameters"])(SVMLearner)
409
410class SVMClassifierWrapper(Orange.core.SVMClassifier):
411    def __new__(cls, wrapped):
412        return Orange.core.SVMClassifier.__new__(cls, name=wrapped.name)
413
414    def __init__(self, wrapped):
415        self.wrapped = wrapped
416        for name, val in wrapped.__dict__.items():
417            self.__dict__[name] = val
418
419    def __call__(self, example, what=Orange.core.GetValue):
420        example = Orange.data.Instance(self.wrapped.domain, example)
421        return self.wrapped(example, what)
422
423    def class_distribution(self, example):
424        example = Orange.data.Instance(self.wrapped.domain, example)
425        return self.wrapped.class_distribution(example)
426
427    def get_decision_values(self, example):
428        example = Orange.data.Instance(self.wrapped.domain, example)
429        return self.wrapped.get_decision_values(example)
430
431    def get_model(self):
432        return self.wrapped.get_model()
433
434    def __reduce__(self):
435        return SVMClassifierWrapper, (self.wrapped,), dict([(name, val) \
436            for name, val in self.__dict__.items() \
437            if name not in self.wrapped.__dict__])
438
439SVMClassifierWrapper = Orange.misc.deprecated_members({
440    "classDistribution": "class_distribution",
441    "getDecisionValues": "get_decision_values",
442    "getModel" : "get_model",
443    })(SVMClassifierWrapper)
444
445class SVMLearnerSparse(SVMLearner):
446
447    """A :class:`SVMLearner` that learns from
448    meta attributes.
449   
450    Meta attributes do not need to be registered with the data set domain, or
451    present in all the instances. Use this for large
452    sparse data sets.
453   
454    """
455
456    @Orange.misc.deprecated_keywords({"useNonMeta": "use_non_meta"})
457    def __init__(self, **kwds):
458        SVMLearner.__init__(self, **kwds)
459        self.use_non_meta = kwds.get("use_non_meta", False)
460        self.learner = Orange.core.SVMLearnerSparse(**kwds)
461
462    def _normalize(self, data):
463        if self.use_non_meta:
464            dc = Orange.core.DomainContinuizer()
465            dc.class_treatment = Orange.core.DomainContinuizer.Ignore
466            dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
467            dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
468            newdomain = dc(data)
469            data = data.translate(newdomain)
470        return data
471
472class SVMLearnerEasy(SVMLearner):
473
474    """Apart from the functionality of :obj:`SVMLearner` it automatically scales the
475    data and perform parameter optimization with the
476    :func:`SVMLearner.tune_parameters`. It is similar to the easy.py script in
477    the LibSVM package.
478   
479    """
480
481    def __init__(self, **kwds):
482        self.folds = 4
483        self.verbose = 0
484        SVMLearner.__init__(self, **kwds)
485        self.learner = SVMLearner(**kwds)
486
487    def learn_classifier(self, data):
488        transformer = Orange.core.DomainContinuizer()
489        transformer.multinomialTreatment = Orange.core.DomainContinuizer.NValues
490        transformer.continuousTreatment = \
491            Orange.core.DomainContinuizer.NormalizeBySpan
492        transformer.classTreatment = Orange.core.DomainContinuizer.Ignore
493        newdomain = transformer(data)
494        newexamples = data.translate(newdomain)
495        #print newexamples[0]
496        params = {}
497        parameters = []
498        self.learner.normalization = False ## Normalization already done
499
500        if self.svm_type in [1, 4]:
501            numOfNuValues = 9
502            if self.svm_type == SVMLearner.Nu_SVC:
503                max_nu = max(self.max_nu(newexamples) - 1e-7, 0.0)
504            else:
505                max_nu = 1.0
506            parameters.append(("nu", [i / 10.0 for i in range(1, 9) \
507                                      if i / 10.0 < max_nu] + [max_nu]))
508        else:
509            parameters.append(("C", [2 ** a for a in  range(-5, 15, 2)]))
510        if self.kernel_type == 2:
511            parameters.append(("gamma", [2 ** a for a in range(-5, 5, 2)] + [0]))
512        import orngWrap
513        tunedLearner = orngWrap.TuneMParameters(object=self.learner,
514                                                parameters=parameters,
515                                                folds=self.folds)
516
517        return SVMClassifierWrapper(tunedLearner(newexamples,
518                                                 verbose=self.verbose))
519
520class SVMLearnerSparseClassEasy(SVMLearnerEasy, SVMLearnerSparse):
521    def __init__(self, **kwds):
522        SVMLearnerSparse.__init__(self, **kwds)
523
524def default_preprocessor():
525    # Construct and return a default preprocessor for use by
526    # Orange.core.LinearLearner learner.
527    impute = Preprocessor_impute()
528    cont = Preprocessor_continuize(multinomialTreatment=
529                                   DomainContinuizer.AsOrdinal)
530    preproc = Preprocessor_preprocessorList(preprocessors=
531                                            [impute, cont])
532    return preproc
533
534class LinearSVMLearner(Orange.core.LinearLearner):
535    """Train a linear SVM model."""
536
537    L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
538    L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC
539    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual
540    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
541    L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC
542
543    __new__ = _orange__new__(base=Orange.core.LinearLearner)
544
545    def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, **kwargs):
546        """
547        :param solver_type: Can be one of class constants:
548       
549            - L2R_L2LOSS_DUAL
550            - L2R_L2LOSS
551            - L2R_L1LOSS_DUAL
552            - L2R_L1LOSS
553            - L1R_L2LOSS
554       
555        :param C: Regularization parameter (default 1.0)
556        :type C: float 
557       
558        :param eps: Stopping criteria (default 0.01)
559        :type eps: float
560         
561        """
562        self.solver_type = solver_type
563        self.eps = eps
564        self.C = C
565        for name, val in kwargs.items():
566            setattr(self, name, val)
567        if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS,
568                self.L2R_L1LOSS_DUAL, self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]:
569            pass
570#            raise ValueError("Invalid solver_type parameter.")
571
572        self.preproc = default_preprocessor()
573
574    def __call__(self, instances, weight_id=None):
575        instances = self.preproc(instances)
576        classifier = super(LinearSVMLearner, self).__call__(instances, weight_id)
577        return classifier
578
579LinearLearner = LinearSVMLearner
580
581class MultiClassSVMLearner(Orange.core.LinearLearner):
582    """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library.
583    """
584    __new__ = _orange__new__(base=Orange.core.LinearLearner)
585
586    def __init__(self, C=1.0, eps=0.01, **kwargs):
587        """\
588        :param C: Regularization parameter (default 1.0)
589        :type C: float 
590       
591        :param eps: Stopping criteria (default 0.01)
592        :type eps: float
593       
594        """
595        self.C = C
596        self.eps = eps
597        for name, val in kwargs.items():
598            setattr(self, name, val)
599
600        self.solver_type = self.MCSVM_CS
601        self.preproc = default_preprocessor()
602
603    def __call__(self, instances, weight_id=None):
604        instances = self.preproc(instances)
605        classifier = super(MultiClassSVMLearner, self).__call__(instances, weight_id)
606        return classifier
607
608#TODO: Unified way to get attr weights for linear SVMs.
609
610def get_linear_svm_weights(classifier, sum=True):
611    """Extract attribute weights from the linear SVM classifier.
612   
613    For multi class classification the weights are square-summed over all
614    binary one vs. one classifiers unles obj:`sum` is False, in which case
615    the return value is a list of weights for each individual binary
616    classifier (in the order of [class1 vs class2, class1 vs class3 ... class2
617    vs class3 ...]).
618       
619    """
620
621    def update_weights(w, key, val, mul):
622        if key in w:
623            w[key] += mul * val
624        else:
625            w[key] = mul * val
626
627    def to_float(val):
628        return float(val) if not val.isSpecial() else 0.0
629
630    SVs = classifier.support_vectors
631    weights = []
632
633    class_var = SVs.domain.class_var
634    if classifier.svm_type in [SVMLearner.C_SVC, SVMLearner.Nu_SVC]:
635        classes = class_var.values
636    else:
637        classes = [""]
638    if len(classes) > 1:
639        sv_ranges = [(0, classifier.nSV[0])]
640        for n in classifier.nSV[1:]:
641            sv_ranges.append((sv_ranges[-1][1], sv_ranges[-1][1] + n))
642    else:
643        sv_ranges = [(0, len(SVs))]
644
645    for i in range(len(classes) - 1):
646        for j in range(i + 1, len(classes)):
647            w = {}
648            coef_ind = j - 1
649            for sv_ind in range(*sv_ranges[i]):
650                attributes = SVs.domain.attributes + \
651                SVs[sv_ind].getmetas(False, Orange.feature.Descriptor).keys()
652                for attr in attributes:
653                    if attr.varType == Orange.feature.Type.Continuous:
654                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
655                                       classifier.coef[coef_ind][sv_ind])
656            coef_ind = i
657            for sv_ind in range(*sv_ranges[j]):
658                attributes = SVs.domain.attributes + \
659                SVs[sv_ind].getmetas(False, Orange.feature.Descriptor).keys()
660                for attr in attributes:
661                    if attr.varType == Orange.feature.Type.Continuous:
662                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
663                                       classifier.coef[coef_ind][sv_ind])
664            weights.append(w)
665
666    if sum:
667        scores = defaultdict(float)
668
669        for w in weights:
670            for attr, w_attr in w.items():
671                scores[attr] += w_attr ** 2
672        for key in scores:
673            scores[key] = math.sqrt(scores[key])
674        return scores
675    else:
676        return weights
677
678getLinearSVMWeights = get_linear_svm_weights
679
680def example_weighted_sum(example, weights):
681    sum = 0
682    for attr, w in weights.items():
683        sum += float(example[attr]) * w
684    return sum
685
686exampleWeightedSum = example_weighted_sum
687
688class ScoreSVMWeights(Orange.feature.scoring.Score):
689    """Score feature by training a linear SVM classifier, using a squared sum of
690    weights (of each binary classifier) as the returned score.
691       
692    Example:
693   
694        >>> score = Orange.classification.svm.ScoreSVMWeights()
695        >>> for feature in table.domain.features:
696        ...     print "%15s: %.3f" % (feature.name, score(feature, table))
697            compactness: 0.019
698            circularity: 0.026
699        distance circularity: 0.007
700           radius ratio: 0.010
701        pr.axis aspect ratio: 0.076
702        max.length aspect ratio: 0.010
703          scatter ratio: 0.046
704          elongatedness: 0.094
705        pr.axis rectangularity: 0.006
706        max.length rectangularity: 0.031
707        scaled variance along major axis: 0.001
708        scaled variance along minor axis: 0.000
709        scaled radius of gyration: 0.002
710        skewness about major axis: 0.004
711        skewness about minor axis: 0.003
712        kurtosis about minor axis: 0.001
713        kurtosis about major axis: 0.060
714          hollows ratio: 0.028
715             
716    """
717
718    def __new__(cls, attr=None, data=None, weight_id=None, **kwargs):
719        self = Orange.feature.scoring.Score.__new__(cls, **kwargs)
720        if data is not None and attr is not None:
721            self.__init__(**kwargs)
722            return self.__call__(attr, data, weight_id)
723        else:
724            return self
725
726    def __reduce__(self):
727        return ScoreSVMWeights, (), dict(self.__dict__)
728
729    def __init__(self, learner=None, **kwargs):
730        """
731        :param learner: Learner used for weight estimation
732            (default LinearSVMLearner(solver_type=L2Loss_SVM_Dual))
733        :type learner: Orange.core.LinearLearner
734       
735        """
736        if learner:
737            self.learner = learner
738        else:
739            self.learner = LinearSVMLearner(solver_type=
740                                    LinearSVMLearner.L2R_L2LOSS_DUAL)
741
742        self._cached_examples = None
743
744    def __call__(self, attr, data, weight_id=None):
745        if data is self._cached_examples:
746            weights = self._cached_weights
747        else:
748            classifier = self.learner(data, weight_id)
749            self._cached_examples = data
750            import numpy
751            weights = numpy.array(classifier.weights)
752            weights = numpy.sum(weights ** 2, axis=0)
753            weights = dict(zip(data.domain.attributes, weights))
754            self._cached_weights = weights
755        return weights.get(attr, 0.0)
756
757MeasureAttribute_SVMWeights = ScoreSVMWeights
758
759class RFE(object):
760
761    """Recursive feature elimination using linear SVM derived attribute
762    weights.
763   
764    Example::
765   
766        import Orange
767        table = Orange.data.Table("vehicle.tab")
768        l = Orange.classification.svm.SVMLearner(
769            kernel_type=Orange.classification.svm.kernels.Linear,
770            normalization=False) # normalization=False will not change the domain
771        rfe = Orange.classification.svm.RFE(l)
772        data_with_removed_features = rfe(table, 5)
773       
774    """
775
776    def __init__(self, learner=None):
777        self.learner = learner or SVMLearner(kernel_type=
778                            kernels.Linear, normalization=False)
779
780    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback", "stopAt": "stop_at" })
781    def get_attr_scores(self, data, stop_at=0, progress_callback=None):
782        """Return a dictionary mapping attributes to scores.
783        A score is a step number at which the attribute
784        was removed from the recursive evaluation.
785       
786        """
787        iter = 1
788        attrs = data.domain.attributes
789        attrScores = {}
790
791        while len(attrs) > stop_at:
792            weights = get_linear_svm_weights(self.learner(data), sum=False)
793            if progress_callback:
794                progress_callback(100. * iter / (len(attrs) - stop_at))
795            score = dict.fromkeys(attrs, 0)
796            for w in weights:
797                for attr, wAttr in w.items():
798                    score[attr] += wAttr ** 2
799            score = score.items()
800            score.sort(lambda a, b:cmp(a[1], b[1]))
801            numToRemove = max(int(len(attrs) * 1.0 / (iter + 1)), 1)
802            for attr, s in  score[:numToRemove]:
803                attrScores[attr] = len(attrScores)
804            attrs = [attr for attr, s in score[numToRemove:]]
805            if attrs:
806                data = data.select(attrs + [data.domain.classVar])
807            iter += 1
808        return attrScores
809
810    @Orange.misc.deprecated_keywords({"numSelected": "num_selected", "progressCallback": "progress_callback"})
811    def __call__(self, data, num_selected=20, progress_callback=None):
812        """Return a new dataset with only `num_selected` best scoring attributes
813       
814        :param data: Data
815        :type data: Orange.data.Table
816        :param num_selected: number of features to preserve
817        :type num_selected: int
818       
819        """
820        scores = self.get_attr_scores(data, progress_callback=progress_callback)
821        scores = sorted(scores.items(), key=lambda item: item[1])
822
823        scores = dict(scores[-num_selected:])
824        attrs = [attr for attr in data.domain.attributes if attr in scores]
825        domain = Orange.data.Domain(attrs, data.domain.classVar)
826        domain.addmetas(data.domain.getmetas())
827        data = Orange.data.Table(domain, data)
828        return data
829
830RFE = Orange.misc.deprecated_members({
831    "getAttrScores": "get_attr_scores"},
832    wrap_methods=["get_attr_scores", "__call__"])(RFE)
833
834def example_table_to_svm_format(table, file):
835    warnings.warn("Deprecated. Use table_to_svm_format", DeprecationWarning)
836    table_to_svm_format(table, file)
837
838exampleTableToSVMFormat = example_table_to_svm_format
839
840def table_to_svm_format(data, file):
841    """Save :obj:`Orange.data.Table` to a format used by LibSVM.
842   
843    :param data: Data
844    :type data: Orange.data.Table
845    :param file: file pointer
846    :type file: file
847   
848    """
849
850    attrs = data.domain.attributes + data.domain.getmetas().values()
851    attrs = [attr for attr in attrs if attr.varType
852             in [Orange.feature.Type.Continuous,
853                 Orange.feature.Type.Discrete]]
854    cv = data.domain.classVar
855
856    for ex in data:
857        if cv.varType == Orange.feature.Type.Discrete:
858            file.write(str(int(ex[cv])))
859        else:
860            file.write(str(float(ex[cv])))
861
862        for i, attr in enumerate(attrs):
863            if not ex[attr].isSpecial():
864                file.write(" " + str(i + 1) + ":" + str(float(ex[attr])))
865        file.write("\n")
866
867tableToSVMFormat = table_to_svm_format
Note: See TracBrowser for help on using the repository browser.