source: orange/orange/Orange/classification/svm/__init__.py @ 9349:fa13a2c52fcd

Revision 9349:fa13a2c52fcd, 30.6 KB checked in by mitar, 2 years ago (diff)

Changed way of linking to code in documentation.

Line 
1"""
2.. index:: support vector machines (SVM)
3.. index:
4   single: classification; support vector machines (SVM)
5   
6*********************************
7Support Vector Machines (``svm``)
8*********************************
9
10This is a module for `Support Vector Machine`_ (SVM) classification. It
11exposes the underlying `LibSVM`_ and `LIBLINEAR`_ library in a standard
12Orange Learner/Classifier interface.
13
14Choosing the right learner
15==========================
16
17Choose an SVM learner suitable for the problem.
18:obj:`SVMLearner` is a general SVM learner. :obj:`SVMLearnerEasy` will
19help with the data normalization and parameter tuning. Learn with a fast
20:obj:`LinearSVMLearner` on data sets with a large number of features.
21
22.. note:: SVM can perform poorly on some data sets. Choose the parameters
23          carefully. In cases of low classification accuracy, try scaling the
24          data and experiment with different parameters. \
25          :obj:`SVMLearnerEasy` class does this automatically (it is similar
26          to the `svm-easy.py` script in the LibSVM distribution).
27
28         
29SVM learners (from `LibSVM`_)
30=============================
31
32The most basic :class:`SVMLearner` implements the standard `LibSVM`_ learner
33It supports four built-in kernel types (Linear, Polynomial, RBF and Sigmoid).
34Additionally kernel functions defined in Python can be used instead.
35
36.. note:: For learning from ordinary :class:`Orange.data.Table` use the \
37    :class:`SVMLearner`. For learning from sparse dataset (i.e.
38    data in `basket` format) use the :class:`SVMLearnerSparse` class.
39
40.. autoclass:: Orange.classification.svm.SVMLearner
41    :members:
42
43.. autoclass:: Orange.classification.svm.SVMLearnerSparse
44    :members:
45    :show-inheritance:
46   
47.. autoclass:: Orange.classification.svm.SVMLearnerEasy
48    :members:
49    :show-inheritance:
50
51The next example shows how to use SVM learners and that :obj:`SVMLearnerEasy`
52with automatic data preprocessing and parameter tuning
53outperforms :obj:`SVMLearner` with the default :obj:`~SVMLearner.nu` and :obj:`~SVMLearner.gamma`: 
54   
55.. literalinclude:: code/svm-easy.py
56
57
58   
59Linear SVM learners (from `LIBLINEAR`_)
60=======================================
61
62The :class:`LinearSVMLearner` learner is more suitable for large scale
63problems as it is significantly faster then :class:`SVMLearner` and its
64subclasses. A down side is it only supports a linear kernel (as the name
65suggests) and does not support probability estimation for the
66classifications. Furthermore a Multi-class SVM learner
67:class:`MultiClassSVMLearner` is provided.
68   
69.. autoclass:: Orange.classification.svm.LinearSVMLearner
70   :members:
71   
72.. autoclass:: Orange.classification.svm.MultiClassSVMLearner
73   :members:
74   
75   
76SVM Based feature selection and scoring
77=======================================
78
79.. autoclass:: Orange.classification.svm.RFE
80
81.. autoclass:: Orange.classification.svm.Score_SVMWeights
82    :show-inheritance:
83 
84 
85Utility functions
86=================
87
88.. automethod:: Orange.classification.svm.max_nu
89
90.. automethod:: Orange.classification.svm.get_linear_svm_weights
91
92.. automethod:: Orange.classification.svm.table_to_svm_format
93
94The following example shows how to get linear SVM weights:
95   
96.. literalinclude:: code/svm-linear-weights.py   
97
98
99.. _kernel-wrapper:
100
101Kernel wrappers
102===============
103
104Kernel wrappers are helper classes used to build custom kernels for use
105with :class:`SVMLearner` and subclasses. All wrapper constructors take
106one or more Python functions (`wrapped` attribute) to wrap. The
107function must be a positive definite kernel, taking two arguments of
108type :class:`Orange.data.Instance` and return a float.
109
110.. autoclass:: Orange.classification.svm.kernels.KernelWrapper
111   :members:
112
113.. autoclass:: Orange.classification.svm.kernels.DualKernelWrapper
114   :members:
115
116.. autoclass:: Orange.classification.svm.kernels.RBFKernelWrapper
117   :members:
118
119.. autoclass:: Orange.classification.svm.kernels.PolyKernelWrapper
120   :members:
121
122.. autoclass:: Orange.classification.svm.kernels.AdditionKernelWrapper
123   :members:
124
125.. autoclass:: Orange.classification.svm.kernels.MultiplicationKernelWrapper
126   :members:
127
128.. autoclass:: Orange.classification.svm.kernels.CompositeKernelWrapper
129   :members:
130
131.. autoclass:: Orange.classification.svm.kernels.SparseLinKernel
132   :members:
133
134Example:
135
136.. literalinclude:: code/svm-custom-kernel.py
137
138.. _`Support Vector Machine`: http://en.wikipedia.org/wiki/Support_vector_machine
139.. _`LibSVM`: http://www.csie.ntu.edu.tw/~cjlin/libsvm/
140.. _`LIBLINEAR`: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
141
142"""
143
144import math
145
146from collections import defaultdict
147
148import Orange.core
149import Orange.data
150import Orange.misc
151import Orange.feature
152
153import kernels
154import warnings
155
156from Orange.core import SVMLearner as _SVMLearner
157from Orange.core import SVMLearnerSparse as _SVMLearnerSparse
158from Orange.core import LinearClassifier, \
159                        LinearLearner, \
160                        SVMClassifier, \
161                        SVMClassifierSparse
162                       
163from Orange.preprocess import Preprocessor_impute, \
164                              Preprocessor_continuize, \
165                              Preprocessor_preprocessorList, \
166                              DomainContinuizer
167
168from Orange.data import variable
169
170from Orange.misc import _orange__new__
171
172def max_nu(data):
173    """Return the maximum nu parameter for Nu_SVC support vector learning
174    for the given data table.
175   
176    :param data: Data with discrete class variable
177    :type data: Orange.data.Table
178   
179    """
180    nu = 1.0
181    dist = list(Orange.core.Distribution(data.domain.classVar, data))
182    def pairs(seq):
183        for i, n1 in enumerate(seq):
184            for n2 in seq[i+1:]:
185                yield n1, n2
186    return min([2.0 * min(n1, n2) / (n1 + n2) for n1, n2 in pairs(dist) \
187                if n1 != 0 and n2 !=0] + [nu])
188   
189maxNu = max_nu
190   
191class SVMLearner(_SVMLearner):
192    """
193    :param svm_type: defines the SVM type (can be C_SVC, Nu_SVC
194        (default), OneClass, Epsilon_SVR, Nu_SVR)
195    :type svm_type: SVMLearner.SVMType
196    :param kernel_type: defines the kernel type for learning
197        (can be kernels.RBF (default), kernels.Linear, kernels.Polynomial,
198        kernels.Sigmoid, kernels.Custom)
199    :type kernel_type: SVMLearner.Kernel
200    :param degree: kernel parameter (for Polynomial) (default 3)
201    :type degree: int
202    :param gamma: kernel parameter (Polynomial/RBF/Sigmoid)
203        (default 1.0/num_of_features)
204    :type gamma: float
205    :param coef0: kernel parameter (Polynomial/Sigmoid) (default 0)
206    :type coef0: int
207    :param kernel_func: function that will be called if `kernel_type` is
208        `kernels.Custom`. It must accept two :obj:`Orange.data.Instance`
209        arguments and return a float (see :ref:`kernel-wrapper` for some
210        examples).
211    :type kernel_func: callable function
212    :param C: C parameter for C_SVC, Epsilon_SVR and Nu_SVR
213    :type C: float
214    :param nu: Nu parameter for Nu_SVC, Nu_SVR and OneClass (default 0.5)
215    :type nu: float
216    :param p: epsilon in loss-function for Epsilon_SVR
217    :type p: float
218    :param cache_size: cache memory size in MB (default 200)
219    :type cache_size: int
220    :param eps: tolerance of termination criterion (default 0.001)
221    :type eps: float
222    :param probability: build a probability model
223        (default False)
224    :type probability: bool
225    :param shrinking: use shrinking heuristics
226        (default True)
227    :type shrinking: bool
228    :param weight: a list of class weights
229    :type weight: list
230   
231    Example:
232   
233        >>> import Orange
234        >>> from Orange.classification import svm
235        >>> from Orange.evaluation import testing, scoring
236        >>> table = Orange.data.Table("vehicle.tab")
237        >>> learner = svm.SVMLearner()
238        >>> results = testing.cross_validation([learner], table, folds=5)
239        >>> print scoring.CA(results)
240   
241    """
242    __new__ = _orange__new__(_SVMLearner)
243   
244    C_SVC = _SVMLearner.C_SVC
245    Nu_SVC = _SVMLearner.Nu_SVC
246    OneClass = _SVMLearner.OneClass
247    Nu_SVR = _SVMLearner.Nu_SVR
248    Epsilon_SVR = _SVMLearner.Epsilon_SVR
249           
250    @Orange.misc.deprecated_keywords({"kernelFunc": "kernel_func"})
251    def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF, 
252                 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3, 
253                 coef0=0, shrinking=True, probability=True, verbose=False, 
254                 cache_size=200, eps=0.001, normalization=True,
255                 weight=[], **kwargs):
256        self.svm_type = svm_type
257        self.kernel_type = kernel_type
258        self.kernel_func = kernel_func
259        self.C = C
260        self.nu = nu
261        self.p = p
262        self.gamma = gamma
263        self.degree = degree
264        self.coef0 = coef0
265        self.shrinking = shrinking
266        self.probability = probability
267        self.verbose = verbose
268        self.cache_size = cache_size
269        self.eps = eps
270        self.normalization = normalization
271        for key, val in kwargs.items():
272            setattr(self, key, val)
273        self.learner = Orange.core.SVMLearner(**kwargs)
274        self.weight = weight
275
276    max_nu = staticmethod(max_nu)
277
278    def __call__(self, data, weight=0):
279        """Construct a SVM classifier
280       
281        :param table: data with continuous features
282        :type table: Orange.data.Table
283       
284        :param weight: unused - use the constructors ``weight``
285            parameter to set class weights
286       
287        """
288       
289        examples = Orange.core.Preprocessor_dropMissingClasses(data)
290        class_var = examples.domain.class_var
291        if len(examples) == 0:
292            raise ValueError("Example table is without any defined classes")
293       
294        # Fix the svm_type parameter if we have a class_var/svm_type mismatch
295        if self.svm_type in [0,1] and \
296            isinstance(class_var, Orange.data.variable.Continuous):
297            self.svm_type += 3
298            #raise AttributeError, "Cannot learn a discrete classifier from non descrete class data. Use EPSILON_SVR or NU_SVR for regression"
299        if self.svm_type in [3,4] and \
300            isinstance(class_var, Orange.data.variable.Discrete):
301            self.svm_type -= 3
302            #raise AttributeError, "Cannot do regression on descrete class data. Use C_SVC or NU_SVC for classification"
303        if self.kernel_type == kernels.Custom and not self.kernel_func:
304            raise ValueError("Custom kernel function not supplied")
305       
306        import warnings
307       
308        nu = self.nu
309        if self.svm_type == SVMLearner.Nu_SVC: #is nu feasible
310            max_nu= self.max_nu(examples)
311            if self.nu > max_nu:
312                if getattr(self, "verbose", 0):
313                    warnings.warn("Specified nu %.3f is infeasible. \
314                    Setting nu to %.3f" % (self.nu, max_nu))
315                nu = max(max_nu - 1e-7, 0.0)
316           
317        for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p", 
318                     "gamma", "degree", "coef0", "shrinking", "probability", 
319                     "verbose", "cache_size", "eps"]:
320            setattr(self.learner, name, getattr(self, name))
321        self.learner.nu = nu
322        self.learner.set_weights(self.weight)
323        if self.svm_type == SVMLearner.OneClass and self.probability:
324            self.learner.probability = False
325            warnings.warn("One-class SVM probability output not supported yet.")
326        return self.learn_classifier(examples)
327
328    def learn_classifier(self, data):
329        if self.normalization:
330            data = self._normalize(data)
331            svm = self.learner(data)
332            return SVMClassifierWrapper(svm)
333        return self.learner(data)
334
335    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"})
336    def tune_parameters(self, data, parameters=None, folds=5, verbose=0, 
337                       progress_callback=None):
338        """Tune the ``parameters`` on given ``data`` using
339        cross validation.
340       
341        :param data: data for parameter tuning
342        :type data: Orange.data.Table
343        :param parameters: defaults to ["nu", "C", "gamma"]
344        :type parameters: list of strings
345        :param folds: number of folds used for cross validation
346        :type folds: int
347        :param verbose: default False
348        :type verbose: bool
349        :param progress_callback: report progress
350        :type progress_callback: callback function
351           
352        An example that tunes the `gamma` parameter on `data` using 3-fold cross
353        validation.
354       
355            >>> svm = Orange.classification.svm.SVMLearner()
356            >>> svm.tune_parameters(table, parameters=["gamma"], folds=3)
357                   
358        """
359       
360        import orngWrap
361       
362        if parameters is None:
363            parameters = ["nu", "C", "gamma"]
364           
365        searchParams = []
366        normalization = self.normalization
367        if normalization:
368            data = self._normalize(data)
369            self.normalization = False
370        if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \
371                    and "nu" in parameters:
372            numOfNuValues=9
373            if isinstance(data.domain.class_var, variable.Discrete):
374                max_nu = max(self.max_nu(data) - 1e-7, 0.0)
375            else:
376                max_nu = 1.0
377            searchParams.append(("nu", [i/10.0 for i in range(1, 9) if \
378                                        i/10.0 < max_nu] + [max_nu]))
379        elif "C" in parameters:
380            searchParams.append(("C", [2**a for a in  range(-5,15,2)]))
381        if self.kernel_type==2 and "gamma" in parameters:
382            searchParams.append(("gamma", [2**a for a in range(-5,5,2)]+[0]))
383        tunedLearner = orngWrap.TuneMParameters(object=self,
384                            parameters=searchParams, 
385                            folds=folds, 
386                            returnWhat=orngWrap.TuneMParameters.returnLearner, 
387                            progressCallback=progress_callback
388                            if progress_callback else lambda i:None)
389        tunedLearner(data, verbose=verbose)
390        if normalization:
391            self.normalization = normalization
392
393    def _normalize(self, data):
394        dc = Orange.core.DomainContinuizer()
395        dc.class_treatment = Orange.core.DomainContinuizer.Ignore
396        dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
397        dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
398        newdomain = dc(data)
399        return data.translate(newdomain)
400
401SVMLearner = Orange.misc.deprecated_members({
402    "learnClassifier": "learn_classifier", 
403    "tuneParameters": "tune_parameters",
404    "kernelFunc" : "kernel_func",
405    },
406    wrap_methods=["__init__", "tune_parameters"])(SVMLearner)
407
408class SVMClassifierWrapper(Orange.core.SVMClassifier):
409    def __new__(cls, wrapped):
410        return Orange.core.SVMClassifier.__new__(cls, name=wrapped.name)
411   
412    def __init__(self, wrapped):
413        self.wrapped = wrapped
414        for name, val in wrapped.__dict__.items():
415            self.__dict__[name] = val
416       
417    def __call__(self, example, what=Orange.core.GetValue):
418        example = Orange.data.Instance(self.wrapped.domain, example)
419        return self.wrapped(example, what)
420   
421    def class_distribution(self, example):
422        example = Orange.data.Instance(self.wrapped.domain, example)
423        return self.wrapped.class_distribution(example)
424   
425    def get_decision_values(self, example):
426        example = Orange.data.Instance(self.wrapped.domain, example)
427        return self.wrapped.get_decision_values(example)
428   
429    def get_model(self):
430        return self.wrapped.get_model()
431   
432    def __reduce__(self):
433        return SVMClassifierWrapper, (self.wrapped,), dict([(name, val) \
434            for name, val in self.__dict__.items() \
435            if name not in self.wrapped.__dict__])
436           
437SVMClassifierWrapper = Orange.misc.deprecated_members({
438    "classDistribution": "class_distribution", 
439    "getDecisionValues": "get_decision_values",
440    "getModel" : "get_model",
441    })(SVMClassifierWrapper)
442           
443class SVMLearnerSparse(SVMLearner):
444   
445    """A :class:`SVMLearner` that learns from
446    meta attributes.
447   
448    Meta attributes do not need to be registered with the data set domain, or
449    present in all the instances. Use this for large
450    sparse data sets.
451   
452    """
453   
454    @Orange.misc.deprecated_keywords({"useNonMeta": "use_non_meta"})
455    def __init__(self, **kwds):
456        SVMLearner.__init__(self, **kwds)
457        self.use_non_meta = kwds.get("use_non_meta", False)
458        self.learner = Orange.core.SVMLearnerSparse(**kwds)
459       
460    def _normalize(self, data):
461        if self.use_non_meta:
462            dc = Orange.core.DomainContinuizer()
463            dc.class_treatment = Orange.core.DomainContinuizer.Ignore
464            dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
465            dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
466            newdomain = dc(data)
467            data = data.translate(newdomain)
468        return data
469
470class SVMLearnerEasy(SVMLearner):
471   
472    """Apart from the functionality of :obj:`SVMLearner` it automatically scales the
473    data and perform parameter optimization with the
474    :func:`SVMLearner.tune_parameters`. It is similar to the easy.py script in
475    the LibSVM package.
476   
477    """
478   
479    def __init__(self, **kwds):
480        self.folds=4
481        self.verbose=0
482        SVMLearner.__init__(self, **kwds)
483        self.learner = SVMLearner(**kwds)
484       
485    def learn_classifier(self, data):
486        transformer=Orange.core.DomainContinuizer()
487        transformer.multinomialTreatment=Orange.core.DomainContinuizer.NValues
488        transformer.continuousTreatment= \
489            Orange.core.DomainContinuizer.NormalizeBySpan
490        transformer.classTreatment=Orange.core.DomainContinuizer.Ignore
491        newdomain=transformer(data)
492        newexamples=data.translate(newdomain)
493        #print newexamples[0]
494        params={}
495        parameters = []
496        self.learner.normalization = False ## Normalization already done
497       
498        if self.svm_type in [1,4]:
499            numOfNuValues=9
500            if self.svm_type == SVMLearner.Nu_SVC:
501                max_nu = max(self.max_nu(newexamples) - 1e-7, 0.0)
502            else:
503                max_nu = 1.0
504            parameters.append(("nu", [i/10.0 for i in range(1, 9) \
505                                      if i/10.0 < max_nu] + [max_nu]))
506        else:
507            parameters.append(("C", [2**a for a in  range(-5,15,2)]))
508        if self.kernel_type==2:
509            parameters.append(("gamma", [2**a for a in range(-5,5,2)]+[0]))
510        import orngWrap
511        tunedLearner = orngWrap.TuneMParameters(object=self.learner, 
512                                                parameters=parameters, 
513                                                folds=self.folds)
514       
515        return SVMClassifierWrapper(tunedLearner(newexamples,
516                                                 verbose=self.verbose))
517
518SVMLearner = Orange.misc.deprecated_members({
519    "learnClassifier": "learn_classifier",
520    })(SVMLearner)
521
522class SVMLearnerSparseClassEasy(SVMLearnerEasy, SVMLearnerSparse):
523    def __init__(self, **kwds):
524        SVMLearnerSparse.__init__(self, **kwds)
525
526def default_preprocessor():
527    # Construct and return a default preprocessor for use by
528    # Orange.core.LinearLearner learner.
529    impute = Preprocessor_impute()
530    cont = Preprocessor_continuize(multinomialTreatment=
531                                   DomainContinuizer.AsOrdinal)
532    preproc = Preprocessor_preprocessorList(preprocessors=
533                                            [impute, cont])
534    return preproc
535
536class LinearSVMLearner(Orange.core.LinearLearner):
537    """Train a linear SVM model."""
538   
539    L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
540    L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC
541    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual
542    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
543    L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC
544   
545    __new__ = _orange__new__(base=Orange.core.LinearLearner)
546       
547    def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, **kwargs):
548        """
549        :param solver_type: Can be one of class constants:
550       
551            - L2R_L2LOSS_DUAL
552            - L2R_L2LOSS
553            - L2R_L1LOSS_DUAL
554            - L2R_L1LOSS
555            - L1R_L2LOSS
556       
557        :param C: Regularization parameter (default 1.0)
558        :type C: float 
559       
560        :param eps: Stopping criteria (default 0.01)
561        :type eps: float
562         
563        """
564        self.solver_type = solver_type
565        self.eps = eps
566        self.C = C
567        for name, val in kwargs.items():
568            setattr(self, name, val)
569        if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS,
570                self.L2R_L1LOSS_DUAL, self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]:
571            pass
572#            raise ValueError("Invalid solver_type parameter.")
573       
574        self.preproc = default_preprocessor()
575           
576    def __call__(self, instances, weight_id=None):
577        instances = self.preproc(instances)
578        classifier = super(LinearSVMLearner, self).__call__(instances, weight_id)
579        return classifier
580       
581LinearLearner = LinearSVMLearner
582
583class MultiClassSVMLearner(Orange.core.LinearLearner):
584    """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library.
585    """
586    __new__ = _orange__new__(base=Orange.core.LinearLearner)
587       
588    def __init__(self, C=1.0, eps=0.01, **kwargs):
589        """\
590        :param C: Regularization parameter (default 1.0)
591        :type C: float 
592       
593        :param eps: Stopping criteria (default 0.01)
594        :type eps: float
595       
596        """
597        self.C = C
598        self.eps = eps
599        for name, val in kwargs.items():
600            setattr(self, name, val)
601           
602        self.solver_type = self.MCSVM_CS
603        self.preproc = default_preprocessor()
604       
605    def __call__(self, instances, weight_id=None):
606        instances = self.preproc(instances)
607        classifier = super(MultiClassSVMLearner, self).__call__(instances, weight_id)
608        return classifier
609
610#TODO: Unified way to get attr weights for linear SVMs.
611
612def get_linear_svm_weights(classifier, sum=True):
613    """Extract attribute weights from the linear SVM classifier.
614   
615    For multi class classification the weights are square-summed over all
616    binary one vs. one classifiers unles obj:`sum` is False, in which case
617    the return value is a list of weights for each individual binary
618    classifier (in the order of [class1 vs class2, class1 vs class3 ... class2
619    vs class3 ...]).
620       
621    """
622   
623    def update_weights(w, key, val, mul):
624        if key in w:
625            w[key]+=mul*val
626        else:
627            w[key]=mul*val
628           
629    def to_float(val):
630        return float(val) if not val.isSpecial() else 0.0 
631           
632    SVs=classifier.support_vectors
633    weights = []
634   
635    class_var = SVs.domain.class_var
636    if classifier.svm_type in [SVMLearner.C_SVC, SVMLearner.Nu_SVC]:
637        classes = class_var.values
638    else:
639        classes = [""]
640    if len(classes) > 1:
641        sv_ranges = [(0, classifier.nSV[0])]
642        for n in classifier.nSV[1:]:
643            sv_ranges.append((sv_ranges[-1][1], sv_ranges[-1][1]+n))
644    else:
645        sv_ranges = [(0, len(SVs))]
646       
647    for i in range(len(classes) - 1):
648        for j in range(i+1, len(classes)):
649            w = {}
650            coef_ind = j - 1
651            for sv_ind in range(*sv_ranges[i]):
652                attributes = SVs.domain.attributes + \
653                SVs[sv_ind].getmetas(False, Orange.data.variable.Variable).keys()
654                for attr in attributes:
655                    if attr.varType == Orange.data.Type.Continuous:
656                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
657                                       classifier.coef[coef_ind][sv_ind])
658            coef_ind=i
659            for sv_ind in range(*sv_ranges[j]):
660                attributes = SVs.domain.attributes + \
661                SVs[sv_ind].getmetas(False, Orange.data.variable.Variable).keys()
662                for attr in attributes:
663                    if attr.varType==Orange.data.Type.Continuous:
664                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
665                                       classifier.coef[coef_ind][sv_ind])
666            weights.append(w)
667           
668    if sum:
669        scores = defaultdict(float)
670       
671        for w in weights:
672            for attr, w_attr in w.items():
673                scores[attr] += w_attr**2
674        for key in scores:
675            scores[key] = math.sqrt(scores[key])
676        return scores
677    else:
678        return weights
679
680getLinearSVMWeights = get_linear_svm_weights
681
682def example_weighted_sum(example, weights):
683    sum=0
684    for attr, w in weights.items():
685        sum += float(example[attr]) * w
686    return sum
687       
688exampleWeightedSum = example_weighted_sum
689
690class Score_SVMWeights(Orange.feature.scoring.Score):
691    """Score feature by training a linear SVM classifier, using a squared sum of
692    weights (of each binary classifier) as the returned score.
693       
694    Example:
695   
696        >>> score = Score_SVMWeights()
697        >>> for feature in table.domain.features:
698            ...   print "%15s: %.3f" % (feature.name, score(feature, table))
699         
700    """
701   
702    def __new__(cls, attr=None, data=None, weightId=None, **kwargs):
703        self = Orange.feature.scoring.Score.__new__(cls, **kwargs)
704        if data is not None and attr is not None:
705            self.__init__(**kwargs)
706            return self.__call__(attr, data, weightId)
707        else:
708            return self
709       
710    def __reduce__(self):
711        return Score_SVMWeights, (), dict(self.__dict__)
712   
713    def __init__(self, learner=None, **kwargs):
714        """
715        :param learner: Learner used for weight estimation
716            (default LinearSVMLearner(solver_type=L2Loss_SVM_Dual))
717        :type learner: Orange.core.LinearLearner
718       
719        """
720        if learner:
721            self.learner = learner
722        else:
723            self.learner = LinearSVMLearner(solver_type=
724                                    LinearSVMLearner.L2R_L2LOSS_DUAL)
725             
726        self._cached_examples = None
727       
728    def __call__(self, attr, data, weightId=None):
729        if data is self._cached_examples:
730            weights = self._cached_weights
731        else:
732            classifier = self.learner(data, weightId)
733            self._cached_examples = data
734            import numpy
735            weights = numpy.array(classifier.weights)
736            weights = numpy.sum(weights ** 2, axis=0)
737            weights = dict(zip(data.domain.attributes, weights))
738            self._cached_weights = weights
739        return weights.get(attr, 0.0)
740
741MeasureAttribute_SVMWeights = Score_SVMWeights
742
743class RFE(object):
744   
745    """Recursive feature elimination using linear SVM derived attribute
746    weights.
747   
748    Example:
749   
750        >>> rfe = RFE(SVMLearner(kernel_type=kernels.Linear, \
751normalization=False)) # normalization=False -> do not change the domain
752        >>> data_with_removed_features = rfe(table, 5) # table with 5 best attributes
753       
754    """
755   
756    def __init__(self, learner=None):
757        self.learner = learner or SVMLearner(kernel_type=
758                            kernels.Linear, normalization=False)
759
760    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback", "stopAt": "stop_at" })
761    def get_attr_scores(self, data, stop_at=0, progress_callback=None):
762        """Return a dictionary mapping attributes to scores.
763        A score is a step number at which the attribute
764        was removed from the recursive evaluation.
765       
766        """
767        iter = 1
768        attrs = data.domain.attributes
769        attrScores = {}
770       
771        while len(attrs) > stop_at:
772            weights = get_linear_svm_weights(self.learner(data), sum=False)
773            if progress_callback:
774                progress_callback(100. * iter / (len(attrs) - stop_at))
775            score = dict.fromkeys(attrs, 0)
776            for w in weights:
777                for attr, wAttr in w.items():
778                    score[attr] += wAttr**2
779            score = score.items()
780            score.sort(lambda a,b:cmp(a[1],b[1]))
781            numToRemove = max(int(len(attrs)*1.0/(iter+1)), 1)
782            for attr, s in  score[:numToRemove]:
783                attrScores[attr] = len(attrScores)
784            attrs = [attr for attr, s in score[numToRemove:]]
785            if attrs:
786                data = data.select(attrs + [data.domain.classVar])
787            iter += 1
788        return attrScores
789   
790    @Orange.misc.deprecated_keywords({"numSelected": "num_selected", "progressCallback": "progress_callback"})
791    def __call__(self, data, num_selected=20, progress_callback=None):
792        """Return a new dataset with only `num_selected` best scoring attributes
793       
794        :param data: Data
795        :type data: Orange.data.Table
796        :param num_selected: number of features to preserve
797        :type num_selected: int
798       
799        """
800        scores = self.get_attr_scores(data, progress_callback=progress_callback)
801        scores = sorted(scores.items(), key=lambda item: item[1])
802       
803        scores = dict(scores[-num_selected:])
804        attrs = [attr for attr in data.domain.attributes if attr in scores]
805        domain = Orange.data.Domain(attrs, data.domain.classVar)
806        domain.addmetas(data.domain.getmetas())
807        data = Orange.data.Table(domain, data)
808        return data
809
810RFE = Orange.misc.deprecated_members({
811    "getAttrScores": "get_attr_scores"},
812    wrap_methods=["get_attr_scores", "__call__"])(RFE)
813
814def example_table_to_svm_format(table, file):
815    warnings.warn("Deprecated. Use table_to_svm_format", DeprecationWarning)
816    table_to_svm_format(table, file)
817
818exampleTableToSVMFormat = example_table_to_svm_format
819
820def table_to_svm_format(data, file):
821    """Save :obj:`Orange.data.Table` to a format used by LibSVM.
822   
823    :param data: Data
824    :type data: Orange.data.Table
825    :param file: file pointer
826    :type file: file
827   
828    """
829   
830    attrs = data.domain.attributes + data.domain.getmetas().values()
831    attrs = [attr for attr in attrs if attr.varType
832             in [Orange.data.Type.Continuous, 
833                 Orange.data.Type.Discrete]]
834    cv = data.domain.classVar
835   
836    for ex in data:
837        if cv.varType == Orange.data.Type.Discrete:
838            file.write(str(int(ex[cv]))) 
839        else:
840            file.write(str(float(ex[cv])))
841             
842        for i, attr in enumerate(attrs):
843            if not ex[attr].isSpecial():
844                file.write(" "+str(i+1)+":"+str(float(ex[attr])))
845        file.write("\n")
846     
847tableToSVMFormat = table_to_svm_format
Note: See TracBrowser for help on using the repository browser.