Changeset 9011:cc169bc8d64a in orange


Ignore:
Timestamp:
09/24/11 10:40:34 (3 years ago)
Author:
miha <miha.stajdohar@…>
Branch:
default
Convert:
1117da7ba84ca2090d5bafb25243b9b35ce74d67
Message:

Refactored documentation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/classification/svm/__init__.py

    r8990 r9011  
    11""" 
    2 .. index:: classification, support vector machines (SVM) 
    3  
     2.. index:: support vector machines (SVM) 
     3.. index: 
     4   single: classification; support vector machines (SVM) 
     5    
    46********************************* 
    5 Support vector machines (``svm``) 
     7Support Vector Machines (``svm``) 
    68********************************* 
    79 
    8 A collection of classes that wrap the `LibSVM library 
     10This module includes classes that wrap the `LibSVM library 
    911<http://www.csie.ntu.edu.tw/~cjlin/libsvm/>`_, a library for `support vector 
    10 machines <http://en.wikipedia.org/wiki/Support_vector_machine>`_ (SVM). In this 
    11 way SVM learners from LibSVM behave like ordinary Orange learners and can be 
     12machines <http://en.wikipedia.org/wiki/Support_vector_machine>`_ (SVM). SVM  
     13learners from LibSVM behave like ordinary Orange learners and can be 
    1214used as Python objects in training, classification and evaluation tasks. The 
    13 implementation supports the implementation of Python-based kernels, that can be 
    14 plugged-in into LibSVM implementations. 
    15  
    16 .. note:: On some data-sets SVM can perform very poorly. SVM can be very 
    17           sensitive to the proper choice of the parameters. If you are having 
    18           problems with the learner's accuracy try scaling the data and using 
    19           different parameters or choose an easier approach and use the \ 
    20           :obj:`SVMLearnerEasy` class which does this automatically (it is 
    21           similar to the `svm-easy.py`_ script in the LibSVM distribution). 
     15implementation supports Python-based kernels, that can be plugged-in into the 
     16LibSVM. 
     17 
     18.. note:: SVM can perform poorly on some data sets. Choose the parameters  
     19          carefully. In case of low classification accuracy, try scaling the  
     20          data and different parameters. :obj:`SVMLearnerEasy` class does this  
     21          automatically (similar to the `svm-easy.py`_ script in the LibSVM  
     22          distribution). 
    2223           
    2324SVM learners 
    2425============ 
    2526 
     27Choose an SVM learner suitable for the problem at hand. :obj:`SVMLearner` is a  
     28general SVM learner. Use :obj:`SVMLearnerSparse` to learn from the  
     29:obj:`Orange.data.Table` meta attributes. :obj:`SVMLearnerEasy` will help with 
     30the data normalization and parameter tuning. Learn with a fast  
     31:obj:`LinearLearner` on data sets with large number of features. 
     32 
     33How to use SVM learners (`svm-easy.py`_ uses: `vehicle.tab`_):  
     34     
     35.. literalinclude:: code/svm-easy.py 
     36 
     37:obj:`SVMLearnerEasy` with automatic data preprocessing and parameter tuning  
     38outperforms :obj:`SVMLearner` with the default nu and gamma parameters. 
     39 
    2640.. autoclass:: Orange.classification.svm.SVMLearner 
    2741   :members: 
     
    4559.. automethod:: Orange.classification.svm.table_to_svm_format 
    4660 
     61How to get lienear SVM weights (`svm-linear-weights.py`_,  
     62uses: `brown-selected.tab`_): 
     63     
     64.. literalinclude:: code/svm-linear-weights.py     
     65 
    4766SVM-derived feature weights 
    4867--------------------------- 
    4968 
    50 .. autoclass:: Orange.classification.svm.MeasureAttribute_SVMWeights 
     69.. autoclass:: Orange.classification.svm.Score_SVMWeights 
    5170   :members: 
    5271 
     
    103122.. _iris.tab: code/iris.tab 
    104123.. _vehicle.tab: code/vehicle.tab 
     124 
    105125""" 
    106126 
     
    112132import Orange.data 
    113133import Orange.misc 
     134import Orange.feature 
    114135 
    115136import kernels 
     
    154175    return _orange__new_wrapped 
    155176 
    156 def max_nu(examples): 
     177def max_nu(data): 
    157178    """Return the maximum nu parameter for Nu_SVC support vector learning 
    158179     for the given data table.  
    159180     
     181    :param data: data table with continuous features 
     182    :type data: Orange.data.Table 
     183     
    160184    """ 
    161185    nu = 1.0 
    162     dist = list(Orange.core.Distribution(examples.domain.classVar, examples)) 
     186    dist = list(Orange.core.Distribution(data.domain.classVar, data)) 
    163187    def pairs(seq): 
    164188        for i, n1 in enumerate(seq): 
     
    254278    max_nu = staticmethod(max_nu) 
    255279 
    256     def __call__(self, examples, weight=0): 
     280    def __call__(self, data, weight=0): 
    257281        """Construct a SVM classifier 
    258282         
    259         :param examples: data table with continuous features 
    260         :type examples: Orange.data.Table 
     283        :param table: data table with continuous features 
     284        :type table: Orange.data.Table 
    261285        :param weight: refer to `LibSVM documentation  
    262286            <http://http://www.csie.ntu.edu.tw/~cjlin/libsvm/>`_ 
    263287         
    264288        """ 
    265         examples = Orange.core.Preprocessor_dropMissingClasses(examples) 
     289         
     290        examples = Orange.core.Preprocessor_dropMissingClasses(data) 
    266291        if len(examples) == 0: 
    267292            raise ValueError("Example table is without any defined classes") 
     
    298323        return self.learnClassifier(examples) 
    299324 
    300     def learn_classifier(self, examples): 
     325    def learn_classifier(self, data): 
    301326        if self.normalization: 
    302             examples = self._normalize(examples) 
    303             svm = self.learner(examples) 
     327            data = self._normalize(data) 
     328            svm = self.learner(data) 
    304329#            if self.: 
    305330#                return SVMClassifierWrapper(svm) 
    306331#            else: 
    307332            return SVMClassifierWrapper(svm) 
    308         return self.learner(examples) 
     333        return self.learner(data) 
    309334 
    310335    @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"}) 
    311     def tune_parameters(self, examples, parameters=None, folds=5, verbose=0,  
     336    def tune_parameters(self, data, parameters=None, folds=5, verbose=0,  
    312337                       progress_callback=None): 
    313338        """Tune the parameters of the SVMLearner on given instances using  
    314339        cross validation. 
    315340         
    316         :param examples: data table on which to tune the parameters 
    317         :type examples: Orange.data.Table  
     341        :param data: data table on which to tune the parameters 
     342        :type data: Orange.data.Table  
    318343        :param parameters: if not set defaults to ["nu", "C", "gamma"] 
    319344        :type parameters: list of strings 
     
    327352        Example: 
    328353         
    329             >>> svm = SVMLearner() 
    330             >>> svm.tuneParameters(examples, parameters=["gamma"], folds=3) 
    331              
    332         This code tunes the `gamma` parameter on `examples` using 3-fold cross  
     354            >>> svm = Orange.classification.svm.SVMLearner() 
     355            >>> svm.tune_parameters(table, parameters=["gamma"], folds=3) 
     356             
     357        This code tunes the `gamma` parameter on `data` using 3-fold cross  
    333358        validation   
    334359         
    335360        """ 
     361         
    336362        import orngWrap 
     363         
    337364        parameters = ["nu", "C", "gamma"] if parameters == None else parameters 
    338365        searchParams = [] 
    339366        normalization = self.normalization 
    340367        if normalization: 
    341             examples = self._normalize(examples) 
     368            data = self._normalize(data) 
    342369            self.normalization = False 
    343370        if self.svm_type == SVMLearner.Nu_SVC and "nu" in parameters: 
    344371            numOfNuValues=9 
    345             max_nu = max(self.max_nu(examples) - 1e-7, 0.0) 
     372            max_nu = max(self.max_nu(data) - 1e-7, 0.0) 
    346373            searchParams.append(("nu", [i/10.0 for i in range(1, 9) if \ 
    347374                                        i/10.0 < max_nu] + [max_nu])) 
     
    356383                            progressCallback=progress_callback  
    357384                            if progress_callback else lambda i:None) 
    358         tunedLearner(examples, verbose=verbose) 
     385        tunedLearner(data, verbose=verbose) 
    359386        if normalization: 
    360387            self.normalization = normalization 
    361388 
    362     def _normalize(self, examples): 
     389    def _normalize(self, data): 
    363390        dc = Orange.core.DomainContinuizer() 
    364391        dc.classTreatment = Orange.core.DomainContinuizer.Ignore 
    365392        dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan 
    366393        dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues 
    367         newdomain = dc(examples) 
    368         return examples.translate(newdomain) 
     394        newdomain = dc(data) 
     395        return data.translate(newdomain) 
    369396 
    370397SVMLearner = Orange.misc.deprecated_members({ 
     
    413440     
    414441    """Same as SVMLearner except that it learns from the  
    415         :obj:`Orange.data.Table` meta attributes. 
    416      
    417     .. note:: Note that meta attributes don't need to be registered with 
    418         the data-set domain, or present in all the instances. Use this if you 
    419         are learning from large sparse data-sets. 
     442    :obj:`Orange.data.Table` meta attributes. 
     443     
     444    Meta attributes do not need to be registered with the data set domain, or  
     445    present in all the instances. Use this if you are learning from a large  
     446    sparse data set. 
    420447     
    421448    """ 
     
    428455     
    429456    """Same as :obj:`SVMLearner` except that it will automatically scale the  
    430     data and perform parameter optimization using the :obj:`tuneParameters` 
    431     method similar to the easy.py script in LibSVM package. Use this if the 
    432     SVMLearner performs badly. 
    433      
    434     Example (`svm-easy.py`_ uses: `vehicle.tab`_) 
    435      
    436     .. literalinclude:: code/svm-easy.py 
     457    data and perform parameter optimization using the  
     458    :obj:`SVMLearner.tune_parameters` method. Similar to the easy.py script in  
     459    LibSVM package. Use this if the SVMLearner performs badly. 
    437460     
    438461    """ 
     
    444467        self.learner = SVMLearner(**kwds) 
    445468         
    446     def learn_classifier(self, examples): 
     469    def learn_classifier(self, data): 
    447470        transformer=Orange.core.DomainContinuizer() 
    448471        transformer.multinomialTreatment=Orange.core.DomainContinuizer.NValues 
     
    450473            Orange.core.DomainContinuizer.NormalizeBySpan 
    451474        transformer.classTreatment=Orange.core.DomainContinuizer.Ignore 
    452         newdomain=transformer(examples) 
    453         newexamples=examples.translate(newdomain) 
     475        newdomain=transformer(data) 
     476        newexamples=data.translate(newdomain) 
    454477        #print newexamples[0] 
    455478        params={} 
     
    490513    solver_type == L2Loss_SVM_Dual  
    491514     
    492     .. note:: The default in Orange.core.LinearLearner is L2_LR 
     515    The default in Orange.core.LinearLearner is L2_LR 
    493516     
    494517    """ 
     
    513536    """Extract attribute weights from the linear svm classifier. 
    514537     
    515     .. note:: For multi class classification the weights are square-summed 
    516         over all binary one vs. one classifiers. If you want weights for 
    517         each binary classifier pass `sum=False` flag (In this case the order 
    518         of reported weights are for class1 vs class2, class1 vs class3 ...  
    519         class2 vs class3 ... classifiers). 
    520          
    521     Example (`svm-linear-weights.py`_, uses: `brown-selected.tab`_) 
    522         .. literalinclude:: code/svm-linear-weights.py 
    523          
    524     """ 
     538    For multi class classification the weights are square-summed over all binary  
     539    one vs. one classifiers. If you want weights for each binary classifier pass  
     540    `sum=False` flag (In this case the order of reported weights are for class1  
     541    vs class2, class1 vs class3 ... class2 vs class3 ... classifiers). 
     542         
     543    """ 
     544     
    525545    def update_weights(w, key, val, mul): 
    526546        if key in w: 
     
    584604exampleWeightedSum = example_weighted_sum 
    585605 
    586 class MeasureAttribute_SVMWeights(Orange.core.MeasureAttribute): 
    587      
    588     """Measure attribute relevance by training a linear SVM classifier on 
    589     provided instances, using a squared sum of weights (of each binary 
    590     classifier) as the returned measure. 
     606class Score_SVMWeights(Orange.feature.scoring.Score): 
     607     
     608    """Base: :obj:`Orange.feature.scoring.Score` 
     609     
     610    Score feature by training a linear SVM classifier, using a squared sum of  
     611    weights (of each binary classifier) as the returned score. 
    591612         
    592613    Example: 
    593614     
    594         >>> measure = MeasureAttribute_SVMWeights() 
    595         >>> for attr in table.domain.attributes: 
    596             ...   print "%15s: %.3f" % (attr.name, measure(attr, data)) 
     615        >>> score = Score_SVMWeights() 
     616        >>> for feature in table.domain.features: 
     617            ...   print "%15s: %.3f" % (feature.name, score(feature, table)) 
    597618           
    598619    """ 
    599620     
    600     def __new__(cls, attr=None, examples=None, weightId=None, **kwargs): 
    601         self = Orange.core.MeasureAttribute.__new__(cls, **kwargs) 
    602         if examples is not None and attr is not None: 
     621    def __new__(cls, attr=None, data=None, weightId=None, **kwargs): 
     622        self = Orange.feature.scoring.Score.__new__(cls, **kwargs) 
     623        if data is not None and attr is not None: 
    603624            self.__init__(**kwargs) 
    604             return self.__call__(attr, examples, weightId) 
     625            return self.__call__(attr, data, weightId) 
    605626        else: 
    606627            return self 
    607628         
    608629    def __reduce__(self): 
    609         return MeasureAttribute_SVMWeights, (), {"learner": self.learner} 
     630        return Score_SVMWeights, (), {"learner": self.learner} 
    610631     
    611632    def __init__(self, learner=None, **kwargs): 
     
    623644        self._cached_examples = None 
    624645         
    625     def __call__(self, attr, examples, weightId=None): 
    626         if examples is self._cached_examples: 
     646    def __call__(self, attr, data, weightId=None): 
     647        if data is self._cached_examples: 
    627648            weights = self._cached_weights 
    628649        else: 
    629             classifier = self.learner(examples, weightId) 
    630             self._cached_examples = examples 
     650            classifier = self.learner(data, weightId) 
     651            self._cached_examples = data 
    631652            import numpy 
    632653            weights = numpy.array(classifier.weights) 
    633654            weights = numpy.sum(weights ** 2, axis=0) 
    634             weights = dict(zip(examples.domain.attributes, weights)) 
     655            weights = dict(zip(data.domain.attributes, weights)) 
    635656            self._cached_weights = weights 
    636657        return weights.get(attr, 0.0) 
    637658 
     659MeasureAttribute_SVMWeights = Score_SVMWeights 
     660 
    638661class RFE(object): 
    639662     
     
    643666    Example: 
    644667     
    645         >>> rfe = RFE(SVMLearner(kernel_type=kernels.Linear,  
    646         normalization=False)) # normalization=False -> SVM Learner should  
    647         not change the domain  
    648         >>> data_with_removed_features = rfe(data, 5) # returns an example  
    649         table with only 5 best attributes 
     668        >>> rfe = RFE(SVMLearner(kernel_type=kernels.Linear, \ 
     669normalization=False)) # normalization=False -> do not change the domain  
     670        >>> data_with_removed_features = rfe(table, 5) # table with 5 best attributes 
    650671         
    651672    """ 
     
    687708    @Orange.misc.deprecated_keywords({"numSelected": "num_selected", "progressCallback": "progress_callback"}) 
    688709    def __call__(self, data, num_selected=20, progress_callback=None): 
    689         """Return a new dataset with only `numSelected` best scoring attributes 
     710        """Return a new dataset with only `num_selected` best scoring attributes 
    690711         
    691712        :param data: Data 
     
    709730    wrap_methods=["get_attr_scores", "__call__"])(RFE) 
    710731 
    711 def example_table_to_svm_format(examples, file): 
     732def example_table_to_svm_format(table, file): 
    712733    warnings.warn("Deprecated. Use table_to_svm_format", DeprecationWarning) 
    713     table_to_svm_format(examples, file) 
     734    table_to_svm_format(table, file) 
    714735 
    715736exampleTableToSVMFormat = example_table_to_svm_format 
    716737 
    717 def table_to_svm_format(examples, file): 
    718     """Save :obj:`Orange.data.Table` to a format used by LibSVM.""" 
    719     attrs = examples.domain.attributes + examples.domain.getmetas().values() 
     738def table_to_svm_format(data, file): 
     739    """Save :obj:`Orange.data.Table` to a format used by LibSVM. 
     740     
     741    :param data: Data 
     742    :type data: Orange.data.Table 
     743    :param file: file pointer 
     744    :type file: file 
     745     
     746    """ 
     747     
     748    attrs = data.domain.attributes + data.domain.getmetas().values() 
    720749    attrs = [attr for attr in attrs if attr.varType  
    721750             in [Orange.data.Type.Continuous,  
    722751                 Orange.data.Type.Discrete]] 
    723     cv = examples.domain.classVar 
    724      
    725     for ex in examples: 
     752    cv = data.domain.classVar 
     753     
     754    for ex in data: 
    726755        if cv.varType == Orange.data.Type.Discrete: 
    727756            file.write(str(int(ex[cv])))   
Note: See TracChangeset for help on using the changeset viewer.