source: orange/Orange/classification/svm/__init__.py @ 10133:4260174871fb

Revision 10133:4260174871fb, 31.3 KB checked in by Jure Zbontar <jure.zbontar@…>, 2 years ago (diff)

Fixed doc bug in svm

Line 
1"""
2.. index:: support vector machines (SVM)
3.. index:
4   single: classification; support vector machines (SVM)
5   
6*********************************
7Support Vector Machines (``svm``)
8*********************************
9
10This is a module for `Support Vector Machine`_ (SVM) classification. It
11exposes the underlying `LibSVM`_ and `LIBLINEAR`_ library in a standard
12Orange Learner/Classifier interface.
13
14Choosing the right learner
15==========================
16
17Choose an SVM learner suitable for the problem.
18:obj:`SVMLearner` is a general SVM learner. :obj:`SVMLearnerEasy` will
19help with the data normalization and parameter tuning. Learn with a fast
20:obj:`LinearSVMLearner` on data sets with a large number of features.
21
22.. note:: SVM can perform poorly on some data sets. Choose the parameters
23          carefully. In cases of low classification accuracy, try scaling the
24          data and experiment with different parameters. \
25          :obj:`SVMLearnerEasy` class does this automatically (it is similar
26          to the `svm-easy.py` script in the LibSVM distribution).
27
28         
29SVM learners (from `LibSVM`_)
30=============================
31
32The most basic :class:`SVMLearner` implements the standard `LibSVM`_ learner
33It supports four built-in kernel types (Linear, Polynomial, RBF and Sigmoid).
34Additionally kernel functions defined in Python can be used instead.
35
36.. note:: For learning from ordinary :class:`Orange.data.Table` use the \
37    :class:`SVMLearner`. For learning from sparse dataset (i.e.
38    data in `basket` format) use the :class:`SVMLearnerSparse` class.
39
40.. autoclass:: Orange.classification.svm.SVMLearner
41    :members:
42
43.. autoclass:: Orange.classification.svm.SVMLearnerSparse
44    :members:
45    :show-inheritance:
46   
47.. autoclass:: Orange.classification.svm.SVMLearnerEasy
48    :members:
49    :show-inheritance:
50
51The next example shows how to use SVM learners and that :obj:`SVMLearnerEasy`
52with automatic data preprocessing and parameter tuning
53outperforms :obj:`SVMLearner` with the default :obj:`~SVMLearner.nu` and :obj:`~SVMLearner.gamma`: 
54   
55.. literalinclude:: code/svm-easy.py
56
57
58   
59Linear SVM learners (from `LIBLINEAR`_)
60=======================================
61
62The :class:`LinearSVMLearner` learner is more suitable for large scale
63problems as it is significantly faster then :class:`SVMLearner` and its
64subclasses. A down side is it only supports a linear kernel (as the name
65suggests) and does not support probability estimation for the
66classifications. Furthermore a Multi-class SVM learner
67:class:`MultiClassSVMLearner` is provided.
68   
69.. autoclass:: Orange.classification.svm.LinearSVMLearner
70   :members:
71   
72.. autoclass:: Orange.classification.svm.MultiClassSVMLearner
73   :members:
74   
75   
76SVM Based feature selection and scoring
77=======================================
78
79.. autoclass:: Orange.classification.svm.RFE
80
81.. autoclass:: Orange.classification.svm.ScoreSVMWeights
82    :show-inheritance:
83 
84 
85Utility functions
86=================
87
88.. automethod:: Orange.classification.svm.max_nu
89
90.. automethod:: Orange.classification.svm.get_linear_svm_weights
91
92.. automethod:: Orange.classification.svm.table_to_svm_format
93
94The following example shows how to get linear SVM weights:
95   
96.. literalinclude:: code/svm-linear-weights.py   
97
98
99.. _kernel-wrapper:
100
101Kernel wrappers
102===============
103
104Kernel wrappers are helper classes used to build custom kernels for use
105with :class:`SVMLearner` and subclasses. All wrapper constructors take
106one or more Python functions (`wrapped` attribute) to wrap. The
107function must be a positive definite kernel, taking two arguments of
108type :class:`Orange.data.Instance` and return a float.
109
110.. autoclass:: Orange.classification.svm.kernels.KernelWrapper
111   :members:
112
113.. autoclass:: Orange.classification.svm.kernels.DualKernelWrapper
114   :members:
115
116.. autoclass:: Orange.classification.svm.kernels.RBFKernelWrapper
117   :members:
118
119.. autoclass:: Orange.classification.svm.kernels.PolyKernelWrapper
120   :members:
121
122.. autoclass:: Orange.classification.svm.kernels.AdditionKernelWrapper
123   :members:
124
125.. autoclass:: Orange.classification.svm.kernels.MultiplicationKernelWrapper
126   :members:
127
128.. autoclass:: Orange.classification.svm.kernels.CompositeKernelWrapper
129   :members:
130
131.. autoclass:: Orange.classification.svm.kernels.SparseLinKernel
132   :members:
133
134Example:
135
136.. literalinclude:: code/svm-custom-kernel.py
137
138.. _`Support Vector Machine`: http://en.wikipedia.org/wiki/Support_vector_machine
139.. _`LibSVM`: http://www.csie.ntu.edu.tw/~cjlin/libsvm/
140.. _`LIBLINEAR`: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
141
142"""
143
144import math
145
146from collections import defaultdict
147
148import Orange.core
149import Orange.data
150import Orange.misc
151import Orange.feature
152
153import kernels
154import warnings
155
156from Orange.core import SVMLearner as _SVMLearner
157from Orange.core import SVMLearnerSparse as _SVMLearnerSparse
158from Orange.core import LinearClassifier, \
159                        LinearLearner, \
160                        SVMClassifier, \
161                        SVMClassifierSparse
162                       
163from Orange.preprocess import Preprocessor_impute, \
164                              Preprocessor_continuize, \
165                              Preprocessor_preprocessorList, \
166                              DomainContinuizer
167
168from Orange import feature as variable
169
170from Orange.misc import _orange__new__
171
172def max_nu(data):
173    """Return the maximum nu parameter for Nu_SVC support vector learning
174    for the given data table.
175   
176    :param data: Data with discrete class variable
177    :type data: Orange.data.Table
178   
179    """
180    nu = 1.0
181    dist = list(Orange.core.Distribution(data.domain.classVar, data))
182    def pairs(seq):
183        for i, n1 in enumerate(seq):
184            for n2 in seq[i+1:]:
185                yield n1, n2
186    return min([2.0 * min(n1, n2) / (n1 + n2) for n1, n2 in pairs(dist) \
187                if n1 != 0 and n2 !=0] + [nu])
188   
189maxNu = max_nu
190   
191class SVMLearner(_SVMLearner):
192    """
193    :param svm_type: defines the SVM type (can be C_SVC, Nu_SVC
194        (default), OneClass, Epsilon_SVR, Nu_SVR)
195    :type svm_type: SVMLearner.SVMType
196    :param kernel_type: defines the kernel type for learning
197        (can be kernels.RBF (default), kernels.Linear, kernels.Polynomial,
198        kernels.Sigmoid, kernels.Custom)
199    :type kernel_type: SVMLearner.Kernel
200    :param degree: kernel parameter (for Polynomial) (default 3)
201    :type degree: int
202    :param gamma: kernel parameter (Polynomial/RBF/Sigmoid)
203        (default 1.0/num_of_features)
204    :type gamma: float
205    :param coef0: kernel parameter (Polynomial/Sigmoid) (default 0)
206    :type coef0: int
207    :param kernel_func: function that will be called if `kernel_type` is
208        `kernels.Custom`. It must accept two :obj:`Orange.data.Instance`
209        arguments and return a float (see :ref:`kernel-wrapper` for some
210        examples).
211    :type kernel_func: callable function
212    :param C: C parameter for C_SVC, Epsilon_SVR and Nu_SVR
213    :type C: float
214    :param nu: Nu parameter for Nu_SVC, Nu_SVR and OneClass (default 0.5)
215    :type nu: float
216    :param p: epsilon in loss-function for Epsilon_SVR
217    :type p: float
218    :param cache_size: cache memory size in MB (default 200)
219    :type cache_size: int
220    :param eps: tolerance of termination criterion (default 0.001)
221    :type eps: float
222    :param probability: build a probability model
223        (default False)
224    :type probability: bool
225    :param shrinking: use shrinking heuristics
226        (default True)
227    :type shrinking: bool
228    :param weight: a list of class weights
229    :type weight: list
230   
231    Example:
232   
233        >>> import Orange
234        >>> from Orange.classification import svm
235        >>> from Orange.evaluation import testing, scoring
236        >>> table = Orange.data.Table("vehicle.tab")
237        >>> learner = svm.SVMLearner()
238        >>> results = testing.cross_validation([learner], table, folds=5)
239        >>> print scoring.CA(results)[0]
240        0.789613644274
241   
242    """
243    __new__ = _orange__new__(_SVMLearner)
244   
245    C_SVC = _SVMLearner.C_SVC
246    Nu_SVC = _SVMLearner.Nu_SVC
247    OneClass = _SVMLearner.OneClass
248    Nu_SVR = _SVMLearner.Nu_SVR
249    Epsilon_SVR = _SVMLearner.Epsilon_SVR
250           
251    @Orange.misc.deprecated_keywords({"kernelFunc": "kernel_func"})
252    def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF, 
253                 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3, 
254                 coef0=0, shrinking=True, probability=True, verbose=False, 
255                 cache_size=200, eps=0.001, normalization=True,
256                 weight=(), **kwargs):
257        self.svm_type = svm_type
258        self.kernel_type = kernel_type
259        self.kernel_func = kernel_func
260        self.C = C
261        self.nu = nu
262        self.p = p
263        self.gamma = gamma
264        self.degree = degree
265        self.coef0 = coef0
266        self.shrinking = shrinking
267        self.probability = probability
268        self.verbose = verbose
269        self.cache_size = cache_size
270        self.eps = eps
271        self.normalization = normalization
272        for key, val in kwargs.items():
273            setattr(self, key, val)
274        self.learner = Orange.core.SVMLearner(**kwargs)
275        self.weight = weight
276
277    max_nu = staticmethod(max_nu)
278
279    def __call__(self, data, weight=0):
280        """Construct a SVM classifier
281       
282        :param table: data with continuous features
283        :type table: Orange.data.Table
284       
285        :param weight: unused - use the constructors ``weight``
286            parameter to set class weights
287       
288        """
289       
290        examples = Orange.core.Preprocessor_dropMissingClasses(data)
291        class_var = examples.domain.class_var
292        if len(examples) == 0:
293            raise ValueError("Example table is without any defined classes")
294       
295        # Fix the svm_type parameter if we have a class_var/svm_type mismatch
296        if self.svm_type in [0,1] and \
297            isinstance(class_var, Orange.feature.Continuous):
298            self.svm_type += 3
299            #raise AttributeError, "Cannot learn a discrete classifier from non descrete class data. Use EPSILON_SVR or NU_SVR for regression"
300        if self.svm_type in [3,4] and \
301            isinstance(class_var, Orange.feature.Discrete):
302            self.svm_type -= 3
303            #raise AttributeError, "Cannot do regression on descrete class data. Use C_SVC or NU_SVC for classification"
304        if self.kernel_type == kernels.Custom and not self.kernel_func:
305            raise ValueError("Custom kernel function not supplied")
306       
307        import warnings
308       
309        nu = self.nu
310        if self.svm_type == SVMLearner.Nu_SVC: #is nu feasible
311            max_nu= self.max_nu(examples)
312            if self.nu > max_nu:
313                if getattr(self, "verbose", 0):
314                    warnings.warn("Specified nu %.3f is infeasible. \
315                    Setting nu to %.3f" % (self.nu, max_nu))
316                nu = max(max_nu - 1e-7, 0.0)
317           
318        for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p", 
319                     "gamma", "degree", "coef0", "shrinking", "probability", 
320                     "verbose", "cache_size", "eps"]:
321            setattr(self.learner, name, getattr(self, name))
322        self.learner.nu = nu
323        self.learner.set_weights(self.weight)
324        if self.svm_type == SVMLearner.OneClass and self.probability:
325            self.learner.probability = False
326            warnings.warn("One-class SVM probability output not supported yet.")
327        return self.learn_classifier(examples)
328
329    def learn_classifier(self, data):
330        if self.normalization:
331            data = self._normalize(data)
332            svm = self.learner(data)
333            return SVMClassifierWrapper(svm)
334        return self.learner(data)
335
336    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"})
337    def tune_parameters(self, data, parameters=None, folds=5, verbose=0, 
338                       progress_callback=None):
339        """Tune the ``parameters`` on given ``data`` using
340        cross validation.
341       
342        :param data: data for parameter tuning
343        :type data: Orange.data.Table
344        :param parameters: defaults to ["nu", "C", "gamma"]
345        :type parameters: list of strings
346        :param folds: number of folds used for cross validation
347        :type folds: int
348        :param verbose: default False
349        :type verbose: bool
350        :param progress_callback: report progress
351        :type progress_callback: callback function
352           
353        An example that tunes the `gamma` parameter on `data` using 3-fold cross
354        validation. ::
355
356            svm = Orange.classification.svm.SVMLearner()
357            svm.tune_parameters(table, parameters=["gamma"], folds=3)
358                   
359        """
360       
361        import orngWrap
362       
363        if parameters is None:
364            parameters = ["nu", "C", "gamma"]
365           
366        searchParams = []
367        normalization = self.normalization
368        if normalization:
369            data = self._normalize(data)
370            self.normalization = False
371        if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \
372                    and "nu" in parameters:
373            numOfNuValues=9
374            if isinstance(data.domain.class_var, variable.Discrete):
375                max_nu = max(self.max_nu(data) - 1e-7, 0.0)
376            else:
377                max_nu = 1.0
378            searchParams.append(("nu", [i/10.0 for i in range(1, 9) if \
379                                        i/10.0 < max_nu] + [max_nu]))
380        elif "C" in parameters:
381            searchParams.append(("C", [2**a for a in  range(-5,15,2)]))
382        if self.kernel_type==2 and "gamma" in parameters:
383            searchParams.append(("gamma", [2**a for a in range(-5,5,2)]+[0]))
384        tunedLearner = orngWrap.TuneMParameters(object=self,
385                            parameters=searchParams, 
386                            folds=folds, 
387                            returnWhat=orngWrap.TuneMParameters.returnLearner, 
388                            progressCallback=progress_callback
389                            if progress_callback else lambda i:None)
390        tunedLearner(data, verbose=verbose)
391        if normalization:
392            self.normalization = normalization
393
394    def _normalize(self, data):
395        dc = Orange.core.DomainContinuizer()
396        dc.class_treatment = Orange.core.DomainContinuizer.Ignore
397        dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
398        dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
399        newdomain = dc(data)
400        return data.translate(newdomain)
401
402SVMLearner = Orange.misc.deprecated_members({
403    "learnClassifier": "learn_classifier", 
404    "tuneParameters": "tune_parameters",
405    "kernelFunc" : "kernel_func",
406    },
407    wrap_methods=["__init__", "tune_parameters"])(SVMLearner)
408
409class SVMClassifierWrapper(Orange.core.SVMClassifier):
410    def __new__(cls, wrapped):
411        return Orange.core.SVMClassifier.__new__(cls, name=wrapped.name)
412   
413    def __init__(self, wrapped):
414        self.wrapped = wrapped
415        for name, val in wrapped.__dict__.items():
416            self.__dict__[name] = val
417       
418    def __call__(self, example, what=Orange.core.GetValue):
419        example = Orange.data.Instance(self.wrapped.domain, example)
420        return self.wrapped(example, what)
421   
422    def class_distribution(self, example):
423        example = Orange.data.Instance(self.wrapped.domain, example)
424        return self.wrapped.class_distribution(example)
425   
426    def get_decision_values(self, example):
427        example = Orange.data.Instance(self.wrapped.domain, example)
428        return self.wrapped.get_decision_values(example)
429   
430    def get_model(self):
431        return self.wrapped.get_model()
432   
433    def __reduce__(self):
434        return SVMClassifierWrapper, (self.wrapped,), dict([(name, val) \
435            for name, val in self.__dict__.items() \
436            if name not in self.wrapped.__dict__])
437           
438SVMClassifierWrapper = Orange.misc.deprecated_members({
439    "classDistribution": "class_distribution", 
440    "getDecisionValues": "get_decision_values",
441    "getModel" : "get_model",
442    })(SVMClassifierWrapper)
443           
444class SVMLearnerSparse(SVMLearner):
445   
446    """A :class:`SVMLearner` that learns from
447    meta attributes.
448   
449    Meta attributes do not need to be registered with the data set domain, or
450    present in all the instances. Use this for large
451    sparse data sets.
452   
453    """
454   
455    @Orange.misc.deprecated_keywords({"useNonMeta": "use_non_meta"})
456    def __init__(self, **kwds):
457        SVMLearner.__init__(self, **kwds)
458        self.use_non_meta = kwds.get("use_non_meta", False)
459        self.learner = Orange.core.SVMLearnerSparse(**kwds)
460       
461    def _normalize(self, data):
462        if self.use_non_meta:
463            dc = Orange.core.DomainContinuizer()
464            dc.class_treatment = Orange.core.DomainContinuizer.Ignore
465            dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
466            dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
467            newdomain = dc(data)
468            data = data.translate(newdomain)
469        return data
470
471class SVMLearnerEasy(SVMLearner):
472   
473    """Apart from the functionality of :obj:`SVMLearner` it automatically scales the
474    data and perform parameter optimization with the
475    :func:`SVMLearner.tune_parameters`. It is similar to the easy.py script in
476    the LibSVM package.
477   
478    """
479   
480    def __init__(self, **kwds):
481        self.folds=4
482        self.verbose=0
483        SVMLearner.__init__(self, **kwds)
484        self.learner = SVMLearner(**kwds)
485       
486    def learn_classifier(self, data):
487        transformer=Orange.core.DomainContinuizer()
488        transformer.multinomialTreatment=Orange.core.DomainContinuizer.NValues
489        transformer.continuousTreatment= \
490            Orange.core.DomainContinuizer.NormalizeBySpan
491        transformer.classTreatment=Orange.core.DomainContinuizer.Ignore
492        newdomain=transformer(data)
493        newexamples=data.translate(newdomain)
494        #print newexamples[0]
495        params={}
496        parameters = []
497        self.learner.normalization = False ## Normalization already done
498       
499        if self.svm_type in [1,4]:
500            numOfNuValues=9
501            if self.svm_type == SVMLearner.Nu_SVC:
502                max_nu = max(self.max_nu(newexamples) - 1e-7, 0.0)
503            else:
504                max_nu = 1.0
505            parameters.append(("nu", [i/10.0 for i in range(1, 9) \
506                                      if i/10.0 < max_nu] + [max_nu]))
507        else:
508            parameters.append(("C", [2**a for a in  range(-5,15,2)]))
509        if self.kernel_type==2:
510            parameters.append(("gamma", [2**a for a in range(-5,5,2)]+[0]))
511        import orngWrap
512        tunedLearner = orngWrap.TuneMParameters(object=self.learner, 
513                                                parameters=parameters, 
514                                                folds=self.folds)
515       
516        return SVMClassifierWrapper(tunedLearner(newexamples,
517                                                 verbose=self.verbose))
518
519class SVMLearnerSparseClassEasy(SVMLearnerEasy, SVMLearnerSparse):
520    def __init__(self, **kwds):
521        SVMLearnerSparse.__init__(self, **kwds)
522
523def default_preprocessor():
524    # Construct and return a default preprocessor for use by
525    # Orange.core.LinearLearner learner.
526    impute = Preprocessor_impute()
527    cont = Preprocessor_continuize(multinomialTreatment=
528                                   DomainContinuizer.AsOrdinal)
529    preproc = Preprocessor_preprocessorList(preprocessors=
530                                            [impute, cont])
531    return preproc
532
533class LinearSVMLearner(Orange.core.LinearLearner):
534    """Train a linear SVM model."""
535   
536    L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
537    L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC
538    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual
539    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
540    L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC
541   
542    __new__ = _orange__new__(base=Orange.core.LinearLearner)
543       
544    def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, **kwargs):
545        """
546        :param solver_type: Can be one of class constants:
547       
548            - L2R_L2LOSS_DUAL
549            - L2R_L2LOSS
550            - L2R_L1LOSS_DUAL
551            - L2R_L1LOSS
552            - L1R_L2LOSS
553       
554        :param C: Regularization parameter (default 1.0)
555        :type C: float 
556       
557        :param eps: Stopping criteria (default 0.01)
558        :type eps: float
559         
560        """
561        self.solver_type = solver_type
562        self.eps = eps
563        self.C = C
564        for name, val in kwargs.items():
565            setattr(self, name, val)
566        if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS,
567                self.L2R_L1LOSS_DUAL, self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]:
568            pass
569#            raise ValueError("Invalid solver_type parameter.")
570       
571        self.preproc = default_preprocessor()
572           
573    def __call__(self, instances, weight_id=None):
574        instances = self.preproc(instances)
575        classifier = super(LinearSVMLearner, self).__call__(instances, weight_id)
576        return classifier
577       
578LinearLearner = LinearSVMLearner
579
580class MultiClassSVMLearner(Orange.core.LinearLearner):
581    """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library.
582    """
583    __new__ = _orange__new__(base=Orange.core.LinearLearner)
584       
585    def __init__(self, C=1.0, eps=0.01, **kwargs):
586        """\
587        :param C: Regularization parameter (default 1.0)
588        :type C: float 
589       
590        :param eps: Stopping criteria (default 0.01)
591        :type eps: float
592       
593        """
594        self.C = C
595        self.eps = eps
596        for name, val in kwargs.items():
597            setattr(self, name, val)
598           
599        self.solver_type = self.MCSVM_CS
600        self.preproc = default_preprocessor()
601       
602    def __call__(self, instances, weight_id=None):
603        instances = self.preproc(instances)
604        classifier = super(MultiClassSVMLearner, self).__call__(instances, weight_id)
605        return classifier
606
607#TODO: Unified way to get attr weights for linear SVMs.
608
609def get_linear_svm_weights(classifier, sum=True):
610    """Extract attribute weights from the linear SVM classifier.
611   
612    For multi class classification the weights are square-summed over all
613    binary one vs. one classifiers unles obj:`sum` is False, in which case
614    the return value is a list of weights for each individual binary
615    classifier (in the order of [class1 vs class2, class1 vs class3 ... class2
616    vs class3 ...]).
617       
618    """
619   
620    def update_weights(w, key, val, mul):
621        if key in w:
622            w[key]+=mul*val
623        else:
624            w[key]=mul*val
625           
626    def to_float(val):
627        return float(val) if not val.isSpecial() else 0.0 
628           
629    SVs=classifier.support_vectors
630    weights = []
631   
632    class_var = SVs.domain.class_var
633    if classifier.svm_type in [SVMLearner.C_SVC, SVMLearner.Nu_SVC]:
634        classes = class_var.values
635    else:
636        classes = [""]
637    if len(classes) > 1:
638        sv_ranges = [(0, classifier.nSV[0])]
639        for n in classifier.nSV[1:]:
640            sv_ranges.append((sv_ranges[-1][1], sv_ranges[-1][1]+n))
641    else:
642        sv_ranges = [(0, len(SVs))]
643       
644    for i in range(len(classes) - 1):
645        for j in range(i+1, len(classes)):
646            w = {}
647            coef_ind = j - 1
648            for sv_ind in range(*sv_ranges[i]):
649                attributes = SVs.domain.attributes + \
650                SVs[sv_ind].getmetas(False, Orange.feature.Descriptor).keys()
651                for attr in attributes:
652                    if attr.varType == Orange.feature.Type.Continuous:
653                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
654                                       classifier.coef[coef_ind][sv_ind])
655            coef_ind=i
656            for sv_ind in range(*sv_ranges[j]):
657                attributes = SVs.domain.attributes + \
658                SVs[sv_ind].getmetas(False, Orange.feature.Descriptor).keys()
659                for attr in attributes:
660                    if attr.varType==Orange.feature.Type.Continuous:
661                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
662                                       classifier.coef[coef_ind][sv_ind])
663            weights.append(w)
664           
665    if sum:
666        scores = defaultdict(float)
667       
668        for w in weights:
669            for attr, w_attr in w.items():
670                scores[attr] += w_attr**2
671        for key in scores:
672            scores[key] = math.sqrt(scores[key])
673        return scores
674    else:
675        return weights
676
677getLinearSVMWeights = get_linear_svm_weights
678
679def example_weighted_sum(example, weights):
680    sum=0
681    for attr, w in weights.items():
682        sum += float(example[attr]) * w
683    return sum
684       
685exampleWeightedSum = example_weighted_sum
686
687class ScoreSVMWeights(Orange.feature.scoring.Score):
688    """Score feature by training a linear SVM classifier, using a squared sum of
689    weights (of each binary classifier) as the returned score.
690       
691    Example:
692   
693        >>> score = Orange.classification.svm.ScoreSVMWeights()
694        >>> for feature in table.domain.features:
695        ...     print "%15s: %.3f" % (feature.name, score(feature, table))
696            compactness: 0.019
697            circularity: 0.026
698        distance circularity: 0.007
699           radius ratio: 0.010
700        pr.axis aspect ratio: 0.076
701        max.length aspect ratio: 0.010
702          scatter ratio: 0.046
703          elongatedness: 0.094
704        pr.axis rectangularity: 0.006
705        max.length rectangularity: 0.031
706        scaled variance along major axis: 0.001
707        scaled variance along minor axis: 0.000
708        scaled radius of gyration: 0.002
709        skewness about major axis: 0.004
710        skewness about minor axis: 0.003
711        kurtosis about minor axis: 0.001
712        kurtosis about major axis: 0.060
713          hollows ratio: 0.028
714             
715    """
716   
717    def __new__(cls, attr=None, data=None, weight_id=None, **kwargs):
718        self = Orange.feature.scoring.Score.__new__(cls, **kwargs)
719        if data is not None and attr is not None:
720            self.__init__(**kwargs)
721            return self.__call__(attr, data, weight_id)
722        else:
723            return self
724       
725    def __reduce__(self):
726        return ScoreSVMWeights, (), dict(self.__dict__)
727   
728    def __init__(self, learner=None, **kwargs):
729        """
730        :param learner: Learner used for weight estimation
731            (default LinearSVMLearner(solver_type=L2Loss_SVM_Dual))
732        :type learner: Orange.core.LinearLearner
733       
734        """
735        if learner:
736            self.learner = learner
737        else:
738            self.learner = LinearSVMLearner(solver_type=
739                                    LinearSVMLearner.L2R_L2LOSS_DUAL)
740             
741        self._cached_examples = None
742       
743    def __call__(self, attr, data, weight_id=None):
744        if data is self._cached_examples:
745            weights = self._cached_weights
746        else:
747            classifier = self.learner(data, weight_id)
748            self._cached_examples = data
749            import numpy
750            weights = numpy.array(classifier.weights)
751            weights = numpy.sum(weights ** 2, axis=0)
752            weights = dict(zip(data.domain.attributes, weights))
753            self._cached_weights = weights
754        return weights.get(attr, 0.0)
755
756MeasureAttribute_SVMWeights = ScoreSVMWeights
757
758class RFE(object):
759   
760    """Recursive feature elimination using linear SVM derived attribute
761    weights.
762   
763    Example::
764   
765        import Orange
766        table = Orange.data.Table("vehicle.tab")
767        l = Orange.classification.svm.SVMLearner(
768            kernel_type=Orange.classification.svm.kernels.Linear,
769            normalization=False) # normalization=False will not change the domain
770        rfe = Orange.classification.svm.RFE(l)
771        data_with_removed_features = rfe(table, 5)
772       
773    """
774   
775    def __init__(self, learner=None):
776        self.learner = learner or SVMLearner(kernel_type=
777                            kernels.Linear, normalization=False)
778
779    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback", "stopAt": "stop_at" })
780    def get_attr_scores(self, data, stop_at=0, progress_callback=None):
781        """Return a dictionary mapping attributes to scores.
782        A score is a step number at which the attribute
783        was removed from the recursive evaluation.
784       
785        """
786        iter = 1
787        attrs = data.domain.attributes
788        attrScores = {}
789       
790        while len(attrs) > stop_at:
791            weights = get_linear_svm_weights(self.learner(data), sum=False)
792            if progress_callback:
793                progress_callback(100. * iter / (len(attrs) - stop_at))
794            score = dict.fromkeys(attrs, 0)
795            for w in weights:
796                for attr, wAttr in w.items():
797                    score[attr] += wAttr**2
798            score = score.items()
799            score.sort(lambda a,b:cmp(a[1],b[1]))
800            numToRemove = max(int(len(attrs)*1.0/(iter+1)), 1)
801            for attr, s in  score[:numToRemove]:
802                attrScores[attr] = len(attrScores)
803            attrs = [attr for attr, s in score[numToRemove:]]
804            if attrs:
805                data = data.select(attrs + [data.domain.classVar])
806            iter += 1
807        return attrScores
808   
809    @Orange.misc.deprecated_keywords({"numSelected": "num_selected", "progressCallback": "progress_callback"})
810    def __call__(self, data, num_selected=20, progress_callback=None):
811        """Return a new dataset with only `num_selected` best scoring attributes
812       
813        :param data: Data
814        :type data: Orange.data.Table
815        :param num_selected: number of features to preserve
816        :type num_selected: int
817       
818        """
819        scores = self.get_attr_scores(data, progress_callback=progress_callback)
820        scores = sorted(scores.items(), key=lambda item: item[1])
821       
822        scores = dict(scores[-num_selected:])
823        attrs = [attr for attr in data.domain.attributes if attr in scores]
824        domain = Orange.data.Domain(attrs, data.domain.classVar)
825        domain.addmetas(data.domain.getmetas())
826        data = Orange.data.Table(domain, data)
827        return data
828
829RFE = Orange.misc.deprecated_members({
830    "getAttrScores": "get_attr_scores"},
831    wrap_methods=["get_attr_scores", "__call__"])(RFE)
832
833def example_table_to_svm_format(table, file):
834    warnings.warn("Deprecated. Use table_to_svm_format", DeprecationWarning)
835    table_to_svm_format(table, file)
836
837exampleTableToSVMFormat = example_table_to_svm_format
838
839def table_to_svm_format(data, file):
840    """Save :obj:`Orange.data.Table` to a format used by LibSVM.
841   
842    :param data: Data
843    :type data: Orange.data.Table
844    :param file: file pointer
845    :type file: file
846   
847    """
848   
849    attrs = data.domain.attributes + data.domain.getmetas().values()
850    attrs = [attr for attr in attrs if attr.varType
851             in [Orange.feature.Type.Continuous, 
852                 Orange.feature.Type.Discrete]]
853    cv = data.domain.classVar
854   
855    for ex in data:
856        if cv.varType == Orange.feature.Type.Discrete:
857            file.write(str(int(ex[cv]))) 
858        else:
859            file.write(str(float(ex[cv])))
860             
861        for i, attr in enumerate(attrs):
862            if not ex[attr].isSpecial():
863                file.write(" "+str(i+1)+":"+str(float(ex[attr])))
864        file.write("\n")
865     
866tableToSVMFormat = table_to_svm_format
Note: See TracBrowser for help on using the repository browser.