source: orange/Orange/classification/svm/__init__.py @ 9919:8a2a770ef3af

Revision 9919:8a2a770ef3af, 30.5 KB checked in by markotoplak, 2 years ago (diff)

data.variable -> feature

Line 
1"""
2.. index:: support vector machines (SVM)
3.. index:
4   single: classification; support vector machines (SVM)
5   
6*********************************
7Support Vector Machines (``svm``)
8*********************************
9
10This is a module for `Support Vector Machine`_ (SVM) classification. It
11exposes the underlying `LibSVM`_ and `LIBLINEAR`_ library in a standard
12Orange Learner/Classifier interface.
13
14Choosing the right learner
15==========================
16
17Choose an SVM learner suitable for the problem.
18:obj:`SVMLearner` is a general SVM learner. :obj:`SVMLearnerEasy` will
19help with the data normalization and parameter tuning. Learn with a fast
20:obj:`LinearSVMLearner` on data sets with a large number of features.
21
22.. note:: SVM can perform poorly on some data sets. Choose the parameters
23          carefully. In cases of low classification accuracy, try scaling the
24          data and experiment with different parameters. \
25          :obj:`SVMLearnerEasy` class does this automatically (it is similar
26          to the `svm-easy.py` script in the LibSVM distribution).
27
28         
29SVM learners (from `LibSVM`_)
30=============================
31
32The most basic :class:`SVMLearner` implements the standard `LibSVM`_ learner
33It supports four built-in kernel types (Linear, Polynomial, RBF and Sigmoid).
34Additionally kernel functions defined in Python can be used instead.
35
36.. note:: For learning from ordinary :class:`Orange.data.Table` use the \
37    :class:`SVMLearner`. For learning from sparse dataset (i.e.
38    data in `basket` format) use the :class:`SVMLearnerSparse` class.
39
40.. autoclass:: Orange.classification.svm.SVMLearner
41    :members:
42
43.. autoclass:: Orange.classification.svm.SVMLearnerSparse
44    :members:
45    :show-inheritance:
46   
47.. autoclass:: Orange.classification.svm.SVMLearnerEasy
48    :members:
49    :show-inheritance:
50
51The next example shows how to use SVM learners and that :obj:`SVMLearnerEasy`
52with automatic data preprocessing and parameter tuning
53outperforms :obj:`SVMLearner` with the default :obj:`~SVMLearner.nu` and :obj:`~SVMLearner.gamma`: 
54   
55.. literalinclude:: code/svm-easy.py
56
57
58   
59Linear SVM learners (from `LIBLINEAR`_)
60=======================================
61
62The :class:`LinearSVMLearner` learner is more suitable for large scale
63problems as it is significantly faster then :class:`SVMLearner` and its
64subclasses. A down side is it only supports a linear kernel (as the name
65suggests) and does not support probability estimation for the
66classifications. Furthermore a Multi-class SVM learner
67:class:`MultiClassSVMLearner` is provided.
68   
69.. autoclass:: Orange.classification.svm.LinearSVMLearner
70   :members:
71   
72.. autoclass:: Orange.classification.svm.MultiClassSVMLearner
73   :members:
74   
75   
76SVM Based feature selection and scoring
77=======================================
78
79.. autoclass:: Orange.classification.svm.RFE
80
81.. autoclass:: Orange.classification.svm.Score_SVMWeights
82    :show-inheritance:
83 
84 
85Utility functions
86=================
87
88.. automethod:: Orange.classification.svm.max_nu
89
90.. automethod:: Orange.classification.svm.get_linear_svm_weights
91
92.. automethod:: Orange.classification.svm.table_to_svm_format
93
94The following example shows how to get linear SVM weights:
95   
96.. literalinclude:: code/svm-linear-weights.py   
97
98
99.. _kernel-wrapper:
100
101Kernel wrappers
102===============
103
104Kernel wrappers are helper classes used to build custom kernels for use
105with :class:`SVMLearner` and subclasses. All wrapper constructors take
106one or more Python functions (`wrapped` attribute) to wrap. The
107function must be a positive definite kernel, taking two arguments of
108type :class:`Orange.data.Instance` and return a float.
109
110.. autoclass:: Orange.classification.svm.kernels.KernelWrapper
111   :members:
112
113.. autoclass:: Orange.classification.svm.kernels.DualKernelWrapper
114   :members:
115
116.. autoclass:: Orange.classification.svm.kernels.RBFKernelWrapper
117   :members:
118
119.. autoclass:: Orange.classification.svm.kernels.PolyKernelWrapper
120   :members:
121
122.. autoclass:: Orange.classification.svm.kernels.AdditionKernelWrapper
123   :members:
124
125.. autoclass:: Orange.classification.svm.kernels.MultiplicationKernelWrapper
126   :members:
127
128.. autoclass:: Orange.classification.svm.kernels.CompositeKernelWrapper
129   :members:
130
131.. autoclass:: Orange.classification.svm.kernels.SparseLinKernel
132   :members:
133
134Example:
135
136.. literalinclude:: code/svm-custom-kernel.py
137
138.. _`Support Vector Machine`: http://en.wikipedia.org/wiki/Support_vector_machine
139.. _`LibSVM`: http://www.csie.ntu.edu.tw/~cjlin/libsvm/
140.. _`LIBLINEAR`: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
141
142"""
143
144import math
145
146from collections import defaultdict
147
148import Orange.core
149import Orange.data
150import Orange.misc
151import Orange.feature
152
153import kernels
154import warnings
155
156from Orange.core import SVMLearner as _SVMLearner
157from Orange.core import SVMLearnerSparse as _SVMLearnerSparse
158from Orange.core import LinearClassifier, \
159                        LinearLearner, \
160                        SVMClassifier, \
161                        SVMClassifierSparse
162                       
163from Orange.preprocess import Preprocessor_impute, \
164                              Preprocessor_continuize, \
165                              Preprocessor_preprocessorList, \
166                              DomainContinuizer
167
168from Orange.data import variable
169
170from Orange.misc import _orange__new__
171
172def max_nu(data):
173    """Return the maximum nu parameter for Nu_SVC support vector learning
174    for the given data table.
175   
176    :param data: Data with discrete class variable
177    :type data: Orange.data.Table
178   
179    """
180    nu = 1.0
181    dist = list(Orange.core.Distribution(data.domain.classVar, data))
182    def pairs(seq):
183        for i, n1 in enumerate(seq):
184            for n2 in seq[i+1:]:
185                yield n1, n2
186    return min([2.0 * min(n1, n2) / (n1 + n2) for n1, n2 in pairs(dist) \
187                if n1 != 0 and n2 !=0] + [nu])
188   
189maxNu = max_nu
190   
191class SVMLearner(_SVMLearner):
192    """
193    :param svm_type: defines the SVM type (can be C_SVC, Nu_SVC
194        (default), OneClass, Epsilon_SVR, Nu_SVR)
195    :type svm_type: SVMLearner.SVMType
196    :param kernel_type: defines the kernel type for learning
197        (can be kernels.RBF (default), kernels.Linear, kernels.Polynomial,
198        kernels.Sigmoid, kernels.Custom)
199    :type kernel_type: SVMLearner.Kernel
200    :param degree: kernel parameter (for Polynomial) (default 3)
201    :type degree: int
202    :param gamma: kernel parameter (Polynomial/RBF/Sigmoid)
203        (default 1.0/num_of_features)
204    :type gamma: float
205    :param coef0: kernel parameter (Polynomial/Sigmoid) (default 0)
206    :type coef0: int
207    :param kernel_func: function that will be called if `kernel_type` is
208        `kernels.Custom`. It must accept two :obj:`Orange.data.Instance`
209        arguments and return a float (see :ref:`kernel-wrapper` for some
210        examples).
211    :type kernel_func: callable function
212    :param C: C parameter for C_SVC, Epsilon_SVR and Nu_SVR
213    :type C: float
214    :param nu: Nu parameter for Nu_SVC, Nu_SVR and OneClass (default 0.5)
215    :type nu: float
216    :param p: epsilon in loss-function for Epsilon_SVR
217    :type p: float
218    :param cache_size: cache memory size in MB (default 200)
219    :type cache_size: int
220    :param eps: tolerance of termination criterion (default 0.001)
221    :type eps: float
222    :param probability: build a probability model
223        (default False)
224    :type probability: bool
225    :param shrinking: use shrinking heuristics
226        (default True)
227    :type shrinking: bool
228    :param weight: a list of class weights
229    :type weight: list
230   
231    Example:
232   
233        >>> import Orange
234        >>> from Orange.classification import svm
235        >>> from Orange.evaluation import testing, scoring
236        >>> table = Orange.data.Table("vehicle.tab")
237        >>> learner = svm.SVMLearner()
238        >>> results = testing.cross_validation([learner], table, folds=5)
239        >>> print scoring.CA(results)
240   
241    """
242    __new__ = _orange__new__(_SVMLearner)
243   
244    C_SVC = _SVMLearner.C_SVC
245    Nu_SVC = _SVMLearner.Nu_SVC
246    OneClass = _SVMLearner.OneClass
247    Nu_SVR = _SVMLearner.Nu_SVR
248    Epsilon_SVR = _SVMLearner.Epsilon_SVR
249           
250    @Orange.misc.deprecated_keywords({"kernelFunc": "kernel_func"})
251    def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF, 
252                 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3, 
253                 coef0=0, shrinking=True, probability=True, verbose=False, 
254                 cache_size=200, eps=0.001, normalization=True,
255                 weight=[], **kwargs):
256        self.svm_type = svm_type
257        self.kernel_type = kernel_type
258        self.kernel_func = kernel_func
259        self.C = C
260        self.nu = nu
261        self.p = p
262        self.gamma = gamma
263        self.degree = degree
264        self.coef0 = coef0
265        self.shrinking = shrinking
266        self.probability = probability
267        self.verbose = verbose
268        self.cache_size = cache_size
269        self.eps = eps
270        self.normalization = normalization
271        for key, val in kwargs.items():
272            setattr(self, key, val)
273        self.learner = Orange.core.SVMLearner(**kwargs)
274        self.weight = weight
275
276    max_nu = staticmethod(max_nu)
277
278    def __call__(self, data, weight=0):
279        """Construct a SVM classifier
280       
281        :param table: data with continuous features
282        :type table: Orange.data.Table
283       
284        :param weight: unused - use the constructors ``weight``
285            parameter to set class weights
286       
287        """
288       
289        examples = Orange.core.Preprocessor_dropMissingClasses(data)
290        class_var = examples.domain.class_var
291        if len(examples) == 0:
292            raise ValueError("Example table is without any defined classes")
293       
294        # Fix the svm_type parameter if we have a class_var/svm_type mismatch
295        if self.svm_type in [0,1] and \
296            isinstance(class_var, Orange.feature.Continuous):
297            self.svm_type += 3
298            #raise AttributeError, "Cannot learn a discrete classifier from non descrete class data. Use EPSILON_SVR or NU_SVR for regression"
299        if self.svm_type in [3,4] and \
300            isinstance(class_var, Orange.feature.Discrete):
301            self.svm_type -= 3
302            #raise AttributeError, "Cannot do regression on descrete class data. Use C_SVC or NU_SVC for classification"
303        if self.kernel_type == kernels.Custom and not self.kernel_func:
304            raise ValueError("Custom kernel function not supplied")
305       
306        import warnings
307       
308        nu = self.nu
309        if self.svm_type == SVMLearner.Nu_SVC: #is nu feasible
310            max_nu= self.max_nu(examples)
311            if self.nu > max_nu:
312                if getattr(self, "verbose", 0):
313                    warnings.warn("Specified nu %.3f is infeasible. \
314                    Setting nu to %.3f" % (self.nu, max_nu))
315                nu = max(max_nu - 1e-7, 0.0)
316           
317        for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p", 
318                     "gamma", "degree", "coef0", "shrinking", "probability", 
319                     "verbose", "cache_size", "eps"]:
320            setattr(self.learner, name, getattr(self, name))
321        self.learner.nu = nu
322        self.learner.set_weights(self.weight)
323        if self.svm_type == SVMLearner.OneClass and self.probability:
324            self.learner.probability = False
325            warnings.warn("One-class SVM probability output not supported yet.")
326        return self.learn_classifier(examples)
327
328    def learn_classifier(self, data):
329        if self.normalization:
330            data = self._normalize(data)
331            svm = self.learner(data)
332            return SVMClassifierWrapper(svm)
333        return self.learner(data)
334
335    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"})
336    def tune_parameters(self, data, parameters=None, folds=5, verbose=0, 
337                       progress_callback=None):
338        """Tune the ``parameters`` on given ``data`` using
339        cross validation.
340       
341        :param data: data for parameter tuning
342        :type data: Orange.data.Table
343        :param parameters: defaults to ["nu", "C", "gamma"]
344        :type parameters: list of strings
345        :param folds: number of folds used for cross validation
346        :type folds: int
347        :param verbose: default False
348        :type verbose: bool
349        :param progress_callback: report progress
350        :type progress_callback: callback function
351           
352        An example that tunes the `gamma` parameter on `data` using 3-fold cross
353        validation.
354       
355            >>> svm = Orange.classification.svm.SVMLearner()
356            >>> svm.tune_parameters(table, parameters=["gamma"], folds=3)
357                   
358        """
359       
360        import orngWrap
361       
362        if parameters is None:
363            parameters = ["nu", "C", "gamma"]
364           
365        searchParams = []
366        normalization = self.normalization
367        if normalization:
368            data = self._normalize(data)
369            self.normalization = False
370        if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \
371                    and "nu" in parameters:
372            numOfNuValues=9
373            if isinstance(data.domain.class_var, variable.Discrete):
374                max_nu = max(self.max_nu(data) - 1e-7, 0.0)
375            else:
376                max_nu = 1.0
377            searchParams.append(("nu", [i/10.0 for i in range(1, 9) if \
378                                        i/10.0 < max_nu] + [max_nu]))
379        elif "C" in parameters:
380            searchParams.append(("C", [2**a for a in  range(-5,15,2)]))
381        if self.kernel_type==2 and "gamma" in parameters:
382            searchParams.append(("gamma", [2**a for a in range(-5,5,2)]+[0]))
383        tunedLearner = orngWrap.TuneMParameters(object=self,
384                            parameters=searchParams, 
385                            folds=folds, 
386                            returnWhat=orngWrap.TuneMParameters.returnLearner, 
387                            progressCallback=progress_callback
388                            if progress_callback else lambda i:None)
389        tunedLearner(data, verbose=verbose)
390        if normalization:
391            self.normalization = normalization
392
393    def _normalize(self, data):
394        dc = Orange.core.DomainContinuizer()
395        dc.class_treatment = Orange.core.DomainContinuizer.Ignore
396        dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
397        dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
398        newdomain = dc(data)
399        return data.translate(newdomain)
400
401SVMLearner = Orange.misc.deprecated_members({
402    "learnClassifier": "learn_classifier", 
403    "tuneParameters": "tune_parameters",
404    "kernelFunc" : "kernel_func",
405    },
406    wrap_methods=["__init__", "tune_parameters"])(SVMLearner)
407
408class SVMClassifierWrapper(Orange.core.SVMClassifier):
409    def __new__(cls, wrapped):
410        return Orange.core.SVMClassifier.__new__(cls, name=wrapped.name)
411   
412    def __init__(self, wrapped):
413        self.wrapped = wrapped
414        for name, val in wrapped.__dict__.items():
415            self.__dict__[name] = val
416       
417    def __call__(self, example, what=Orange.core.GetValue):
418        example = Orange.data.Instance(self.wrapped.domain, example)
419        return self.wrapped(example, what)
420   
421    def class_distribution(self, example):
422        example = Orange.data.Instance(self.wrapped.domain, example)
423        return self.wrapped.class_distribution(example)
424   
425    def get_decision_values(self, example):
426        example = Orange.data.Instance(self.wrapped.domain, example)
427        return self.wrapped.get_decision_values(example)
428   
429    def get_model(self):
430        return self.wrapped.get_model()
431   
432    def __reduce__(self):
433        return SVMClassifierWrapper, (self.wrapped,), dict([(name, val) \
434            for name, val in self.__dict__.items() \
435            if name not in self.wrapped.__dict__])
436           
437SVMClassifierWrapper = Orange.misc.deprecated_members({
438    "classDistribution": "class_distribution", 
439    "getDecisionValues": "get_decision_values",
440    "getModel" : "get_model",
441    })(SVMClassifierWrapper)
442           
443class SVMLearnerSparse(SVMLearner):
444   
445    """A :class:`SVMLearner` that learns from
446    meta attributes.
447   
448    Meta attributes do not need to be registered with the data set domain, or
449    present in all the instances. Use this for large
450    sparse data sets.
451   
452    """
453   
454    @Orange.misc.deprecated_keywords({"useNonMeta": "use_non_meta"})
455    def __init__(self, **kwds):
456        SVMLearner.__init__(self, **kwds)
457        self.use_non_meta = kwds.get("use_non_meta", False)
458        self.learner = Orange.core.SVMLearnerSparse(**kwds)
459       
460    def _normalize(self, data):
461        if self.use_non_meta:
462            dc = Orange.core.DomainContinuizer()
463            dc.class_treatment = Orange.core.DomainContinuizer.Ignore
464            dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan
465            dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues
466            newdomain = dc(data)
467            data = data.translate(newdomain)
468        return data
469
470class SVMLearnerEasy(SVMLearner):
471   
472    """Apart from the functionality of :obj:`SVMLearner` it automatically scales the
473    data and perform parameter optimization with the
474    :func:`SVMLearner.tune_parameters`. It is similar to the easy.py script in
475    the LibSVM package.
476   
477    """
478   
479    def __init__(self, **kwds):
480        self.folds=4
481        self.verbose=0
482        SVMLearner.__init__(self, **kwds)
483        self.learner = SVMLearner(**kwds)
484       
485    def learn_classifier(self, data):
486        transformer=Orange.core.DomainContinuizer()
487        transformer.multinomialTreatment=Orange.core.DomainContinuizer.NValues
488        transformer.continuousTreatment= \
489            Orange.core.DomainContinuizer.NormalizeBySpan
490        transformer.classTreatment=Orange.core.DomainContinuizer.Ignore
491        newdomain=transformer(data)
492        newexamples=data.translate(newdomain)
493        #print newexamples[0]
494        params={}
495        parameters = []
496        self.learner.normalization = False ## Normalization already done
497       
498        if self.svm_type in [1,4]:
499            numOfNuValues=9
500            if self.svm_type == SVMLearner.Nu_SVC:
501                max_nu = max(self.max_nu(newexamples) - 1e-7, 0.0)
502            else:
503                max_nu = 1.0
504            parameters.append(("nu", [i/10.0 for i in range(1, 9) \
505                                      if i/10.0 < max_nu] + [max_nu]))
506        else:
507            parameters.append(("C", [2**a for a in  range(-5,15,2)]))
508        if self.kernel_type==2:
509            parameters.append(("gamma", [2**a for a in range(-5,5,2)]+[0]))
510        import orngWrap
511        tunedLearner = orngWrap.TuneMParameters(object=self.learner, 
512                                                parameters=parameters, 
513                                                folds=self.folds)
514       
515        return SVMClassifierWrapper(tunedLearner(newexamples,
516                                                 verbose=self.verbose))
517
518class SVMLearnerSparseClassEasy(SVMLearnerEasy, SVMLearnerSparse):
519    def __init__(self, **kwds):
520        SVMLearnerSparse.__init__(self, **kwds)
521
522def default_preprocessor():
523    # Construct and return a default preprocessor for use by
524    # Orange.core.LinearLearner learner.
525    impute = Preprocessor_impute()
526    cont = Preprocessor_continuize(multinomialTreatment=
527                                   DomainContinuizer.AsOrdinal)
528    preproc = Preprocessor_preprocessorList(preprocessors=
529                                            [impute, cont])
530    return preproc
531
532class LinearSVMLearner(Orange.core.LinearLearner):
533    """Train a linear SVM model."""
534   
535    L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
536    L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC
537    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual
538    L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual
539    L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC
540   
541    __new__ = _orange__new__(base=Orange.core.LinearLearner)
542       
543    def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, **kwargs):
544        """
545        :param solver_type: Can be one of class constants:
546       
547            - L2R_L2LOSS_DUAL
548            - L2R_L2LOSS
549            - L2R_L1LOSS_DUAL
550            - L2R_L1LOSS
551            - L1R_L2LOSS
552       
553        :param C: Regularization parameter (default 1.0)
554        :type C: float 
555       
556        :param eps: Stopping criteria (default 0.01)
557        :type eps: float
558         
559        """
560        self.solver_type = solver_type
561        self.eps = eps
562        self.C = C
563        for name, val in kwargs.items():
564            setattr(self, name, val)
565        if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS,
566                self.L2R_L1LOSS_DUAL, self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]:
567            pass
568#            raise ValueError("Invalid solver_type parameter.")
569       
570        self.preproc = default_preprocessor()
571           
572    def __call__(self, instances, weight_id=None):
573        instances = self.preproc(instances)
574        classifier = super(LinearSVMLearner, self).__call__(instances, weight_id)
575        return classifier
576       
577LinearLearner = LinearSVMLearner
578
579class MultiClassSVMLearner(Orange.core.LinearLearner):
580    """ Multi-class SVM (Crammer and Singer) from the `LIBLINEAR`_ library.
581    """
582    __new__ = _orange__new__(base=Orange.core.LinearLearner)
583       
584    def __init__(self, C=1.0, eps=0.01, **kwargs):
585        """\
586        :param C: Regularization parameter (default 1.0)
587        :type C: float 
588       
589        :param eps: Stopping criteria (default 0.01)
590        :type eps: float
591       
592        """
593        self.C = C
594        self.eps = eps
595        for name, val in kwargs.items():
596            setattr(self, name, val)
597           
598        self.solver_type = self.MCSVM_CS
599        self.preproc = default_preprocessor()
600       
601    def __call__(self, instances, weight_id=None):
602        instances = self.preproc(instances)
603        classifier = super(MultiClassSVMLearner, self).__call__(instances, weight_id)
604        return classifier
605
606#TODO: Unified way to get attr weights for linear SVMs.
607
608def get_linear_svm_weights(classifier, sum=True):
609    """Extract attribute weights from the linear SVM classifier.
610   
611    For multi class classification the weights are square-summed over all
612    binary one vs. one classifiers unles obj:`sum` is False, in which case
613    the return value is a list of weights for each individual binary
614    classifier (in the order of [class1 vs class2, class1 vs class3 ... class2
615    vs class3 ...]).
616       
617    """
618   
619    def update_weights(w, key, val, mul):
620        if key in w:
621            w[key]+=mul*val
622        else:
623            w[key]=mul*val
624           
625    def to_float(val):
626        return float(val) if not val.isSpecial() else 0.0 
627           
628    SVs=classifier.support_vectors
629    weights = []
630   
631    class_var = SVs.domain.class_var
632    if classifier.svm_type in [SVMLearner.C_SVC, SVMLearner.Nu_SVC]:
633        classes = class_var.values
634    else:
635        classes = [""]
636    if len(classes) > 1:
637        sv_ranges = [(0, classifier.nSV[0])]
638        for n in classifier.nSV[1:]:
639            sv_ranges.append((sv_ranges[-1][1], sv_ranges[-1][1]+n))
640    else:
641        sv_ranges = [(0, len(SVs))]
642       
643    for i in range(len(classes) - 1):
644        for j in range(i+1, len(classes)):
645            w = {}
646            coef_ind = j - 1
647            for sv_ind in range(*sv_ranges[i]):
648                attributes = SVs.domain.attributes + \
649                SVs[sv_ind].getmetas(False, Orange.feature.Descriptor).keys()
650                for attr in attributes:
651                    if attr.varType == Orange.data.Type.Continuous:
652                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
653                                       classifier.coef[coef_ind][sv_ind])
654            coef_ind=i
655            for sv_ind in range(*sv_ranges[j]):
656                attributes = SVs.domain.attributes + \
657                SVs[sv_ind].getmetas(False, Orange.feature.Descriptor).keys()
658                for attr in attributes:
659                    if attr.varType==Orange.data.Type.Continuous:
660                        update_weights(w, attr, to_float(SVs[sv_ind][attr]), \
661                                       classifier.coef[coef_ind][sv_ind])
662            weights.append(w)
663           
664    if sum:
665        scores = defaultdict(float)
666       
667        for w in weights:
668            for attr, w_attr in w.items():
669                scores[attr] += w_attr**2
670        for key in scores:
671            scores[key] = math.sqrt(scores[key])
672        return scores
673    else:
674        return weights
675
676getLinearSVMWeights = get_linear_svm_weights
677
678def example_weighted_sum(example, weights):
679    sum=0
680    for attr, w in weights.items():
681        sum += float(example[attr]) * w
682    return sum
683       
684exampleWeightedSum = example_weighted_sum
685
686class Score_SVMWeights(Orange.feature.scoring.Score):
687    """Score feature by training a linear SVM classifier, using a squared sum of
688    weights (of each binary classifier) as the returned score.
689       
690    Example:
691   
692        >>> score = Score_SVMWeights()
693        >>> for feature in table.domain.features:
694            ...   print "%15s: %.3f" % (feature.name, score(feature, table))
695         
696    """
697   
698    def __new__(cls, attr=None, data=None, weightId=None, **kwargs):
699        self = Orange.feature.scoring.Score.__new__(cls, **kwargs)
700        if data is not None and attr is not None:
701            self.__init__(**kwargs)
702            return self.__call__(attr, data, weightId)
703        else:
704            return self
705       
706    def __reduce__(self):
707        return Score_SVMWeights, (), dict(self.__dict__)
708   
709    def __init__(self, learner=None, **kwargs):
710        """
711        :param learner: Learner used for weight estimation
712            (default LinearSVMLearner(solver_type=L2Loss_SVM_Dual))
713        :type learner: Orange.core.LinearLearner
714       
715        """
716        if learner:
717            self.learner = learner
718        else:
719            self.learner = LinearSVMLearner(solver_type=
720                                    LinearSVMLearner.L2R_L2LOSS_DUAL)
721             
722        self._cached_examples = None
723       
724    def __call__(self, attr, data, weightId=None):
725        if data is self._cached_examples:
726            weights = self._cached_weights
727        else:
728            classifier = self.learner(data, weightId)
729            self._cached_examples = data
730            import numpy
731            weights = numpy.array(classifier.weights)
732            weights = numpy.sum(weights ** 2, axis=0)
733            weights = dict(zip(data.domain.attributes, weights))
734            self._cached_weights = weights
735        return weights.get(attr, 0.0)
736
737MeasureAttribute_SVMWeights = Score_SVMWeights
738
739class RFE(object):
740   
741    """Recursive feature elimination using linear SVM derived attribute
742    weights.
743   
744    Example:
745   
746        >>> rfe = RFE(SVMLearner(kernel_type=kernels.Linear, \
747normalization=False)) # normalization=False -> do not change the domain
748        >>> data_with_removed_features = rfe(table, 5) # table with 5 best attributes
749       
750    """
751   
752    def __init__(self, learner=None):
753        self.learner = learner or SVMLearner(kernel_type=
754                            kernels.Linear, normalization=False)
755
756    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback", "stopAt": "stop_at" })
757    def get_attr_scores(self, data, stop_at=0, progress_callback=None):
758        """Return a dictionary mapping attributes to scores.
759        A score is a step number at which the attribute
760        was removed from the recursive evaluation.
761       
762        """
763        iter = 1
764        attrs = data.domain.attributes
765        attrScores = {}
766       
767        while len(attrs) > stop_at:
768            weights = get_linear_svm_weights(self.learner(data), sum=False)
769            if progress_callback:
770                progress_callback(100. * iter / (len(attrs) - stop_at))
771            score = dict.fromkeys(attrs, 0)
772            for w in weights:
773                for attr, wAttr in w.items():
774                    score[attr] += wAttr**2
775            score = score.items()
776            score.sort(lambda a,b:cmp(a[1],b[1]))
777            numToRemove = max(int(len(attrs)*1.0/(iter+1)), 1)
778            for attr, s in  score[:numToRemove]:
779                attrScores[attr] = len(attrScores)
780            attrs = [attr for attr, s in score[numToRemove:]]
781            if attrs:
782                data = data.select(attrs + [data.domain.classVar])
783            iter += 1
784        return attrScores
785   
786    @Orange.misc.deprecated_keywords({"numSelected": "num_selected", "progressCallback": "progress_callback"})
787    def __call__(self, data, num_selected=20, progress_callback=None):
788        """Return a new dataset with only `num_selected` best scoring attributes
789       
790        :param data: Data
791        :type data: Orange.data.Table
792        :param num_selected: number of features to preserve
793        :type num_selected: int
794       
795        """
796        scores = self.get_attr_scores(data, progress_callback=progress_callback)
797        scores = sorted(scores.items(), key=lambda item: item[1])
798       
799        scores = dict(scores[-num_selected:])
800        attrs = [attr for attr in data.domain.attributes if attr in scores]
801        domain = Orange.data.Domain(attrs, data.domain.classVar)
802        domain.addmetas(data.domain.getmetas())
803        data = Orange.data.Table(domain, data)
804        return data
805
806RFE = Orange.misc.deprecated_members({
807    "getAttrScores": "get_attr_scores"},
808    wrap_methods=["get_attr_scores", "__call__"])(RFE)
809
810def example_table_to_svm_format(table, file):
811    warnings.warn("Deprecated. Use table_to_svm_format", DeprecationWarning)
812    table_to_svm_format(table, file)
813
814exampleTableToSVMFormat = example_table_to_svm_format
815
816def table_to_svm_format(data, file):
817    """Save :obj:`Orange.data.Table` to a format used by LibSVM.
818   
819    :param data: Data
820    :type data: Orange.data.Table
821    :param file: file pointer
822    :type file: file
823   
824    """
825   
826    attrs = data.domain.attributes + data.domain.getmetas().values()
827    attrs = [attr for attr in attrs if attr.varType
828             in [Orange.data.Type.Continuous, 
829                 Orange.data.Type.Discrete]]
830    cv = data.domain.classVar
831   
832    for ex in data:
833        if cv.varType == Orange.data.Type.Discrete:
834            file.write(str(int(ex[cv]))) 
835        else:
836            file.write(str(float(ex[cv])))
837             
838        for i, attr in enumerate(attrs):
839            if not ex[attr].isSpecial():
840                file.write(" "+str(i+1)+":"+str(float(ex[attr])))
841        file.write("\n")
842     
843tableToSVMFormat = table_to_svm_format
Note: See TracBrowser for help on using the repository browser.