Changeset 9011:cc169bc8d64a in orange
- Timestamp:
- 09/24/11 10:40:34 (20 months ago)
- Branch:
- default
- Convert:
- 1117da7ba84ca2090d5bafb25243b9b35ce74d67
- File:
-
- 1 edited
-
orange/Orange/classification/svm/__init__.py (modified) (20 diffs)
Legend:
- Unmodified
- Added
- Removed
-
orange/Orange/classification/svm/__init__.py
r8990 r9011 1 1 """ 2 .. index:: classification, support vector machines (SVM) 3 2 .. index:: support vector machines (SVM) 3 .. index: 4 single: classification; support vector machines (SVM) 5 4 6 ********************************* 5 Support vector machines (``svm``)7 Support Vector Machines (``svm``) 6 8 ********************************* 7 9 8 A collection ofclasses that wrap the `LibSVM library10 This module includes classes that wrap the `LibSVM library 9 11 <http://www.csie.ntu.edu.tw/~cjlin/libsvm/>`_, a library for `support vector 10 machines <http://en.wikipedia.org/wiki/Support_vector_machine>`_ (SVM). In this11 way SVMlearners from LibSVM behave like ordinary Orange learners and can be12 machines <http://en.wikipedia.org/wiki/Support_vector_machine>`_ (SVM). SVM 13 learners from LibSVM behave like ordinary Orange learners and can be 12 14 used as Python objects in training, classification and evaluation tasks. The 13 implementation supports the implementation of Python-based kernels, that can be 14 plugged-in into LibSVM implementations. 15 16 .. note:: On some data-sets SVM can perform very poorly. SVM can be very 17 sensitive to the proper choice of the parameters. If you are having 18 problems with the learner's accuracy try scaling the data and using 19 different parameters or choose an easier approach and use the \ 20 :obj:`SVMLearnerEasy` class which does this automatically (it is 21 similar to the `svm-easy.py`_ script in the LibSVM distribution). 15 implementation supports Python-based kernels, that can be plugged-in into the 16 LibSVM. 17 18 .. note:: SVM can perform poorly on some data sets. Choose the parameters 19 carefully. In case of low classification accuracy, try scaling the 20 data and different parameters. :obj:`SVMLearnerEasy` class does this 21 automatically (similar to the `svm-easy.py`_ script in the LibSVM 22 distribution). 22 23 23 24 SVM learners 24 25 ============ 25 26 27 Choose an SVM learner suitable for the problem at hand. :obj:`SVMLearner` is a 28 general SVM learner. Use :obj:`SVMLearnerSparse` to learn from the 29 :obj:`Orange.data.Table` meta attributes. :obj:`SVMLearnerEasy` will help with 30 the data normalization and parameter tuning. Learn with a fast 31 :obj:`LinearLearner` on data sets with large number of features. 32 33 How to use SVM learners (`svm-easy.py`_ uses: `vehicle.tab`_): 34 35 .. literalinclude:: code/svm-easy.py 36 37 :obj:`SVMLearnerEasy` with automatic data preprocessing and parameter tuning 38 outperforms :obj:`SVMLearner` with the default nu and gamma parameters. 39 26 40 .. autoclass:: Orange.classification.svm.SVMLearner 27 41 :members: … … 45 59 .. automethod:: Orange.classification.svm.table_to_svm_format 46 60 61 How to get lienear SVM weights (`svm-linear-weights.py`_, 62 uses: `brown-selected.tab`_): 63 64 .. literalinclude:: code/svm-linear-weights.py 65 47 66 SVM-derived feature weights 48 67 --------------------------- 49 68 50 .. autoclass:: Orange.classification.svm. MeasureAttribute_SVMWeights69 .. autoclass:: Orange.classification.svm.Score_SVMWeights 51 70 :members: 52 71 … … 103 122 .. _iris.tab: code/iris.tab 104 123 .. _vehicle.tab: code/vehicle.tab 124 105 125 """ 106 126 … … 112 132 import Orange.data 113 133 import Orange.misc 134 import Orange.feature 114 135 115 136 import kernels … … 154 175 return _orange__new_wrapped 155 176 156 def max_nu( examples):177 def max_nu(data): 157 178 """Return the maximum nu parameter for Nu_SVC support vector learning 158 179 for the given data table. 159 180 181 :param data: data table with continuous features 182 :type data: Orange.data.Table 183 160 184 """ 161 185 nu = 1.0 162 dist = list(Orange.core.Distribution( examples.domain.classVar, examples))186 dist = list(Orange.core.Distribution(data.domain.classVar, data)) 163 187 def pairs(seq): 164 188 for i, n1 in enumerate(seq): … … 254 278 max_nu = staticmethod(max_nu) 255 279 256 def __call__(self, examples, weight=0):280 def __call__(self, data, weight=0): 257 281 """Construct a SVM classifier 258 282 259 :param examples: data table with continuous features260 :type examples: Orange.data.Table283 :param table: data table with continuous features 284 :type table: Orange.data.Table 261 285 :param weight: refer to `LibSVM documentation 262 286 <http://http://www.csie.ntu.edu.tw/~cjlin/libsvm/>`_ 263 287 264 288 """ 265 examples = Orange.core.Preprocessor_dropMissingClasses(examples) 289 290 examples = Orange.core.Preprocessor_dropMissingClasses(data) 266 291 if len(examples) == 0: 267 292 raise ValueError("Example table is without any defined classes") … … 298 323 return self.learnClassifier(examples) 299 324 300 def learn_classifier(self, examples):325 def learn_classifier(self, data): 301 326 if self.normalization: 302 examples = self._normalize(examples)303 svm = self.learner( examples)327 data = self._normalize(data) 328 svm = self.learner(data) 304 329 # if self.: 305 330 # return SVMClassifierWrapper(svm) 306 331 # else: 307 332 return SVMClassifierWrapper(svm) 308 return self.learner( examples)333 return self.learner(data) 309 334 310 335 @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"}) 311 def tune_parameters(self, examples, parameters=None, folds=5, verbose=0,336 def tune_parameters(self, data, parameters=None, folds=5, verbose=0, 312 337 progress_callback=None): 313 338 """Tune the parameters of the SVMLearner on given instances using 314 339 cross validation. 315 340 316 :param examples: data table on which to tune the parameters317 :type examples: Orange.data.Table341 :param data: data table on which to tune the parameters 342 :type data: Orange.data.Table 318 343 :param parameters: if not set defaults to ["nu", "C", "gamma"] 319 344 :type parameters: list of strings … … 327 352 Example: 328 353 329 >>> svm = SVMLearner()330 >>> svm.tune Parameters(examples, parameters=["gamma"], folds=3)331 332 This code tunes the `gamma` parameter on ` examples` using 3-fold cross354 >>> svm = Orange.classification.svm.SVMLearner() 355 >>> svm.tune_parameters(table, parameters=["gamma"], folds=3) 356 357 This code tunes the `gamma` parameter on `data` using 3-fold cross 333 358 validation 334 359 335 360 """ 361 336 362 import orngWrap 363 337 364 parameters = ["nu", "C", "gamma"] if parameters == None else parameters 338 365 searchParams = [] 339 366 normalization = self.normalization 340 367 if normalization: 341 examples = self._normalize(examples)368 data = self._normalize(data) 342 369 self.normalization = False 343 370 if self.svm_type == SVMLearner.Nu_SVC and "nu" in parameters: 344 371 numOfNuValues=9 345 max_nu = max(self.max_nu( examples) - 1e-7, 0.0)372 max_nu = max(self.max_nu(data) - 1e-7, 0.0) 346 373 searchParams.append(("nu", [i/10.0 for i in range(1, 9) if \ 347 374 i/10.0 < max_nu] + [max_nu])) … … 356 383 progressCallback=progress_callback 357 384 if progress_callback else lambda i:None) 358 tunedLearner( examples, verbose=verbose)385 tunedLearner(data, verbose=verbose) 359 386 if normalization: 360 387 self.normalization = normalization 361 388 362 def _normalize(self, examples):389 def _normalize(self, data): 363 390 dc = Orange.core.DomainContinuizer() 364 391 dc.classTreatment = Orange.core.DomainContinuizer.Ignore 365 392 dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan 366 393 dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues 367 newdomain = dc( examples)368 return examples.translate(newdomain)394 newdomain = dc(data) 395 return data.translate(newdomain) 369 396 370 397 SVMLearner = Orange.misc.deprecated_members({ … … 413 440 414 441 """Same as SVMLearner except that it learns from the 415 :obj:`Orange.data.Table` meta attributes.416 417 .. note:: Note that meta attributes don't need to be registered with418 the data-set domain, or present in all the instances. Use this if you419 are learning from large sparse data-sets.442 :obj:`Orange.data.Table` meta attributes. 443 444 Meta attributes do not need to be registered with the data set domain, or 445 present in all the instances. Use this if you are learning from a large 446 sparse data set. 420 447 421 448 """ … … 428 455 429 456 """Same as :obj:`SVMLearner` except that it will automatically scale the 430 data and perform parameter optimization using the :obj:`tuneParameters` 431 method similar to the easy.py script in LibSVM package. Use this if the 432 SVMLearner performs badly. 433 434 Example (`svm-easy.py`_ uses: `vehicle.tab`_) 435 436 .. literalinclude:: code/svm-easy.py 457 data and perform parameter optimization using the 458 :obj:`SVMLearner.tune_parameters` method. Similar to the easy.py script in 459 LibSVM package. Use this if the SVMLearner performs badly. 437 460 438 461 """ … … 444 467 self.learner = SVMLearner(**kwds) 445 468 446 def learn_classifier(self, examples):469 def learn_classifier(self, data): 447 470 transformer=Orange.core.DomainContinuizer() 448 471 transformer.multinomialTreatment=Orange.core.DomainContinuizer.NValues … … 450 473 Orange.core.DomainContinuizer.NormalizeBySpan 451 474 transformer.classTreatment=Orange.core.DomainContinuizer.Ignore 452 newdomain=transformer( examples)453 newexamples= examples.translate(newdomain)475 newdomain=transformer(data) 476 newexamples=data.translate(newdomain) 454 477 #print newexamples[0] 455 478 params={} … … 490 513 solver_type == L2Loss_SVM_Dual 491 514 492 .. note::The default in Orange.core.LinearLearner is L2_LR515 The default in Orange.core.LinearLearner is L2_LR 493 516 494 517 """ … … 513 536 """Extract attribute weights from the linear svm classifier. 514 537 515 .. note:: For multi class classification the weights are square-summed 516 over all binary one vs. one classifiers. If you want weights for 517 each binary classifier pass `sum=False` flag (In this case the order 518 of reported weights are for class1 vs class2, class1 vs class3 ... 519 class2 vs class3 ... classifiers). 520 521 Example (`svm-linear-weights.py`_, uses: `brown-selected.tab`_) 522 .. literalinclude:: code/svm-linear-weights.py 523 524 """ 538 For multi class classification the weights are square-summed over all binary 539 one vs. one classifiers. If you want weights for each binary classifier pass 540 `sum=False` flag (In this case the order of reported weights are for class1 541 vs class2, class1 vs class3 ... class2 vs class3 ... classifiers). 542 543 """ 544 525 545 def update_weights(w, key, val, mul): 526 546 if key in w: … … 584 604 exampleWeightedSum = example_weighted_sum 585 605 586 class MeasureAttribute_SVMWeights(Orange.core.MeasureAttribute): 587 588 """Measure attribute relevance by training a linear SVM classifier on 589 provided instances, using a squared sum of weights (of each binary 590 classifier) as the returned measure. 606 class Score_SVMWeights(Orange.feature.scoring.Score): 607 608 """Base: :obj:`Orange.feature.scoring.Score` 609 610 Score feature by training a linear SVM classifier, using a squared sum of 611 weights (of each binary classifier) as the returned score. 591 612 592 613 Example: 593 614 594 >>> measure = MeasureAttribute_SVMWeights()595 >>> for attr in table.domain.attributes:596 ... print "%15s: %.3f" % ( attr.name, measure(attr, data))615 >>> score = Score_SVMWeights() 616 >>> for feature in table.domain.features: 617 ... print "%15s: %.3f" % (feature.name, score(feature, table)) 597 618 598 619 """ 599 620 600 def __new__(cls, attr=None, examples=None, weightId=None, **kwargs):601 self = Orange. core.MeasureAttribute.__new__(cls, **kwargs)602 if examplesis not None and attr is not None:621 def __new__(cls, attr=None, data=None, weightId=None, **kwargs): 622 self = Orange.feature.scoring.Score.__new__(cls, **kwargs) 623 if data is not None and attr is not None: 603 624 self.__init__(**kwargs) 604 return self.__call__(attr, examples, weightId)625 return self.__call__(attr, data, weightId) 605 626 else: 606 627 return self 607 628 608 629 def __reduce__(self): 609 return MeasureAttribute_SVMWeights, (), {"learner": self.learner}630 return Score_SVMWeights, (), {"learner": self.learner} 610 631 611 632 def __init__(self, learner=None, **kwargs): … … 623 644 self._cached_examples = None 624 645 625 def __call__(self, attr, examples, weightId=None):626 if examplesis self._cached_examples:646 def __call__(self, attr, data, weightId=None): 647 if data is self._cached_examples: 627 648 weights = self._cached_weights 628 649 else: 629 classifier = self.learner( examples, weightId)630 self._cached_examples = examples650 classifier = self.learner(data, weightId) 651 self._cached_examples = data 631 652 import numpy 632 653 weights = numpy.array(classifier.weights) 633 654 weights = numpy.sum(weights ** 2, axis=0) 634 weights = dict(zip( examples.domain.attributes, weights))655 weights = dict(zip(data.domain.attributes, weights)) 635 656 self._cached_weights = weights 636 657 return weights.get(attr, 0.0) 637 658 659 MeasureAttribute_SVMWeights = Score_SVMWeights 660 638 661 class RFE(object): 639 662 … … 643 666 Example: 644 667 645 >>> rfe = RFE(SVMLearner(kernel_type=kernels.Linear, 646 normalization=False)) # normalization=False -> SVM Learner should 647 not change the domain 648 >>> data_with_removed_features = rfe(data, 5) # returns an example 649 table with only 5 best attributes 668 >>> rfe = RFE(SVMLearner(kernel_type=kernels.Linear, \ 669 normalization=False)) # normalization=False -> do not change the domain 670 >>> data_with_removed_features = rfe(table, 5) # table with 5 best attributes 650 671 651 672 """ … … 687 708 @Orange.misc.deprecated_keywords({"numSelected": "num_selected", "progressCallback": "progress_callback"}) 688 709 def __call__(self, data, num_selected=20, progress_callback=None): 689 """Return a new dataset with only `num Selected` best scoring attributes710 """Return a new dataset with only `num_selected` best scoring attributes 690 711 691 712 :param data: Data … … 709 730 wrap_methods=["get_attr_scores", "__call__"])(RFE) 710 731 711 def example_table_to_svm_format( examples, file):732 def example_table_to_svm_format(table, file): 712 733 warnings.warn("Deprecated. Use table_to_svm_format", DeprecationWarning) 713 table_to_svm_format( examples, file)734 table_to_svm_format(table, file) 714 735 715 736 exampleTableToSVMFormat = example_table_to_svm_format 716 737 717 def table_to_svm_format(examples, file): 718 """Save :obj:`Orange.data.Table` to a format used by LibSVM.""" 719 attrs = examples.domain.attributes + examples.domain.getmetas().values() 738 def table_to_svm_format(data, file): 739 """Save :obj:`Orange.data.Table` to a format used by LibSVM. 740 741 :param data: Data 742 :type data: Orange.data.Table 743 :param file: file pointer 744 :type file: file 745 746 """ 747 748 attrs = data.domain.attributes + data.domain.getmetas().values() 720 749 attrs = [attr for attr in attrs if attr.varType 721 750 in [Orange.data.Type.Continuous, 722 751 Orange.data.Type.Discrete]] 723 cv = examples.domain.classVar724 725 for ex in examples:752 cv = data.domain.classVar 753 754 for ex in data: 726 755 if cv.varType == Orange.data.Type.Discrete: 727 756 file.write(str(int(ex[cv])))
Note: See TracChangeset
for help on using the changeset viewer.
