Changeset 10300:28e3a1a057c6 in orange
 Timestamp:
 02/18/12 14:23:36 (2 years ago)
 Branch:
 default
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/classification/svm/__init__.py
r10133 r10300 160 160 SVMClassifier, \ 161 161 SVMClassifierSparse 162 162 163 163 from Orange.preprocess import Preprocessor_impute, \ 164 164 Preprocessor_continuize, \ … … 182 182 def pairs(seq): 183 183 for i, n1 in enumerate(seq): 184 for n2 in seq[i +1:]:184 for n2 in seq[i + 1:]: 185 185 yield n1, n2 186 186 return min([2.0 * min(n1, n2) / (n1 + n2) for n1, n2 in pairs(dist) \ 187 if n1 != 0 and n2 != 0] + [nu])188 187 if n1 != 0 and n2 != 0] + [nu]) 188 189 189 maxNu = max_nu 190 190 191 191 class SVMLearner(_SVMLearner): 192 192 """ … … 242 242 """ 243 243 __new__ = _orange__new__(_SVMLearner) 244 244 245 245 C_SVC = _SVMLearner.C_SVC 246 246 Nu_SVC = _SVMLearner.Nu_SVC … … 248 248 Nu_SVR = _SVMLearner.Nu_SVR 249 249 Epsilon_SVR = _SVMLearner.Epsilon_SVR 250 250 251 251 @Orange.misc.deprecated_keywords({"kernelFunc": "kernel_func"}) 252 def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF, 253 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3, 254 coef0=0, shrinking=True, probability=True, verbose=False, 252 def __init__(self, svm_type=Nu_SVC, kernel_type=kernels.RBF, 253 kernel_func=None, C=1.0, nu=0.5, p=0.1, gamma=0.0, degree=3, 254 coef0=0, shrinking=True, probability=True, verbose=False, 255 255 cache_size=200, eps=0.001, normalization=True, 256 weight= (), **kwargs):256 weight=[], **kwargs): 257 257 self.svm_type = svm_type 258 258 self.kernel_type = kernel_type … … 287 287 288 288 """ 289 289 290 290 examples = Orange.core.Preprocessor_dropMissingClasses(data) 291 291 class_var = examples.domain.class_var 292 292 if len(examples) == 0: 293 293 raise ValueError("Example table is without any defined classes") 294 294 295 295 # Fix the svm_type parameter if we have a class_var/svm_type mismatch 296 if self.svm_type in [0, 1] and \296 if self.svm_type in [0, 1] and \ 297 297 isinstance(class_var, Orange.feature.Continuous): 298 298 self.svm_type += 3 299 299 #raise AttributeError, "Cannot learn a discrete classifier from non descrete class data. Use EPSILON_SVR or NU_SVR for regression" 300 if self.svm_type in [3, 4] and \300 if self.svm_type in [3, 4] and \ 301 301 isinstance(class_var, Orange.feature.Discrete): 302 302 self.svm_type = 3 … … 304 304 if self.kernel_type == kernels.Custom and not self.kernel_func: 305 305 raise ValueError("Custom kernel function not supplied") 306 306 307 307 import warnings 308 308 309 309 nu = self.nu 310 310 if self.svm_type == SVMLearner.Nu_SVC: #is nu feasible 311 max_nu = self.max_nu(examples)311 max_nu = self.max_nu(examples) 312 312 if self.nu > max_nu: 313 313 if getattr(self, "verbose", 0): … … 315 315 Setting nu to %.3f" % (self.nu, max_nu)) 316 316 nu = max(max_nu  1e7, 0.0) 317 318 for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p", 319 "gamma", "degree", "coef0", "shrinking", "probability", 317 318 for name in ["svm_type", "kernel_type", "kernel_func", "C", "nu", "p", 319 "gamma", "degree", "coef0", "shrinking", "probability", 320 320 "verbose", "cache_size", "eps"]: 321 321 setattr(self.learner, name, getattr(self, name)) 322 322 self.learner.nu = nu 323 323 self.learner.set_weights(self.weight) 324 324 325 if self.svm_type == SVMLearner.OneClass and self.probability: 325 326 self.learner.probability = False … … 335 336 336 337 @Orange.misc.deprecated_keywords({"progressCallback": "progress_callback"}) 337 def tune_parameters(self, data, parameters=None, folds=5, verbose=0, 338 def tune_parameters(self, data, parameters=None, folds=5, verbose=0, 338 339 progress_callback=None): 339 340 """Tune the ``parameters`` on given ``data`` using … … 358 359 359 360 """ 360 361 361 362 import orngWrap 362 363 363 364 if parameters is None: 364 365 parameters = ["nu", "C", "gamma"] 365 366 366 367 searchParams = [] 367 368 normalization = self.normalization … … 371 372 if self.svm_type in [SVMLearner.Nu_SVC, SVMLearner.Nu_SVR] \ 372 373 and "nu" in parameters: 373 numOfNuValues =9374 numOfNuValues = 9 374 375 if isinstance(data.domain.class_var, variable.Discrete): 375 376 max_nu = max(self.max_nu(data)  1e7, 0.0) 376 377 else: 377 378 max_nu = 1.0 378 searchParams.append(("nu", [i /10.0 for i in range(1, 9) if \379 i /10.0 < max_nu] + [max_nu]))379 searchParams.append(("nu", [i / 10.0 for i in range(1, 9) if \ 380 i / 10.0 < max_nu] + [max_nu])) 380 381 elif "C" in parameters: 381 searchParams.append(("C", [2 **a for a in range(5,15,2)]))382 if self.kernel_type ==2 and "gamma" in parameters:383 searchParams.append(("gamma", [2 **a for a in range(5,5,2)]+[0]))382 searchParams.append(("C", [2 ** a for a in range(5, 15, 2)])) 383 if self.kernel_type == 2 and "gamma" in parameters: 384 searchParams.append(("gamma", [2 ** a for a in range(5, 5, 2)] + [0])) 384 385 tunedLearner = orngWrap.TuneMParameters(object=self, 385 parameters=searchParams, 386 folds=folds, 387 returnWhat=orngWrap.TuneMParameters.returnLearner, 388 progressCallback=progress_callback 386 parameters=searchParams, 387 folds=folds, 388 returnWhat=orngWrap.TuneMParameters.returnLearner, 389 progressCallback=progress_callback 389 390 if progress_callback else lambda i:None) 390 391 tunedLearner(data, verbose=verbose) … … 401 402 402 403 SVMLearner = Orange.misc.deprecated_members({ 403 "learnClassifier": "learn_classifier", 404 "learnClassifier": "learn_classifier", 404 405 "tuneParameters": "tune_parameters", 405 406 "kernelFunc" : "kernel_func", … … 410 411 def __new__(cls, wrapped): 411 412 return Orange.core.SVMClassifier.__new__(cls, name=wrapped.name) 412 413 413 414 def __init__(self, wrapped): 414 415 self.wrapped = wrapped 415 416 for name, val in wrapped.__dict__.items(): 416 417 self.__dict__[name] = val 417 418 418 419 def __call__(self, example, what=Orange.core.GetValue): 419 420 example = Orange.data.Instance(self.wrapped.domain, example) 420 421 return self.wrapped(example, what) 421 422 422 423 def class_distribution(self, example): 423 424 example = Orange.data.Instance(self.wrapped.domain, example) 424 425 return self.wrapped.class_distribution(example) 425 426 426 427 def get_decision_values(self, example): 427 428 example = Orange.data.Instance(self.wrapped.domain, example) 428 429 return self.wrapped.get_decision_values(example) 429 430 430 431 def get_model(self): 431 432 return self.wrapped.get_model() 432 433 433 434 def __reduce__(self): 434 435 return SVMClassifierWrapper, (self.wrapped,), dict([(name, val) \ 435 436 for name, val in self.__dict__.items() \ 436 437 if name not in self.wrapped.__dict__]) 437 438 438 439 SVMClassifierWrapper = Orange.misc.deprecated_members({ 439 "classDistribution": "class_distribution", 440 "classDistribution": "class_distribution", 440 441 "getDecisionValues": "get_decision_values", 441 442 "getModel" : "get_model", 442 443 })(SVMClassifierWrapper) 443 444 444 445 class SVMLearnerSparse(SVMLearner): 445 446 446 447 """A :class:`SVMLearner` that learns from 447 448 meta attributes. … … 452 453 453 454 """ 454 455 455 456 @Orange.misc.deprecated_keywords({"useNonMeta": "use_non_meta"}) 456 457 def __init__(self, **kwds): … … 458 459 self.use_non_meta = kwds.get("use_non_meta", False) 459 460 self.learner = Orange.core.SVMLearnerSparse(**kwds) 460 461 461 462 def _normalize(self, data): 462 463 if self.use_non_meta: … … 470 471 471 472 class SVMLearnerEasy(SVMLearner): 472 473 473 474 """Apart from the functionality of :obj:`SVMLearner` it automatically scales the 474 475 data and perform parameter optimization with the … … 477 478 478 479 """ 479 480 480 481 def __init__(self, **kwds): 481 self.folds =4482 self.verbose =0482 self.folds = 4 483 self.verbose = 0 483 484 SVMLearner.__init__(self, **kwds) 484 485 self.learner = SVMLearner(**kwds) 485 486 486 487 def learn_classifier(self, data): 487 transformer =Orange.core.DomainContinuizer()488 transformer.multinomialTreatment =Orange.core.DomainContinuizer.NValues489 transformer.continuousTreatment = \488 transformer = Orange.core.DomainContinuizer() 489 transformer.multinomialTreatment = Orange.core.DomainContinuizer.NValues 490 transformer.continuousTreatment = \ 490 491 Orange.core.DomainContinuizer.NormalizeBySpan 491 transformer.classTreatment =Orange.core.DomainContinuizer.Ignore492 newdomain =transformer(data)493 newexamples =data.translate(newdomain)492 transformer.classTreatment = Orange.core.DomainContinuizer.Ignore 493 newdomain = transformer(data) 494 newexamples = data.translate(newdomain) 494 495 #print newexamples[0] 495 params ={}496 params = {} 496 497 parameters = [] 497 498 self.learner.normalization = False ## Normalization already done 498 499 if self.svm_type in [1, 4]:500 numOfNuValues =9499 500 if self.svm_type in [1, 4]: 501 numOfNuValues = 9 501 502 if self.svm_type == SVMLearner.Nu_SVC: 502 503 max_nu = max(self.max_nu(newexamples)  1e7, 0.0) 503 504 else: 504 505 max_nu = 1.0 505 parameters.append(("nu", [i /10.0 for i in range(1, 9) \506 if i /10.0 < max_nu] + [max_nu]))506 parameters.append(("nu", [i / 10.0 for i in range(1, 9) \ 507 if i / 10.0 < max_nu] + [max_nu])) 507 508 else: 508 parameters.append(("C", [2 **a for a in range(5,15,2)]))509 if self.kernel_type ==2:510 parameters.append(("gamma", [2 **a for a in range(5,5,2)]+[0]))509 parameters.append(("C", [2 ** a for a in range(5, 15, 2)])) 510 if self.kernel_type == 2: 511 parameters.append(("gamma", [2 ** a for a in range(5, 5, 2)] + [0])) 511 512 import orngWrap 512 tunedLearner = orngWrap.TuneMParameters(object=self.learner, 513 parameters=parameters, 513 tunedLearner = orngWrap.TuneMParameters(object=self.learner, 514 parameters=parameters, 514 515 folds=self.folds) 515 516 516 517 return SVMClassifierWrapper(tunedLearner(newexamples, 517 518 verbose=self.verbose)) … … 533 534 class LinearSVMLearner(Orange.core.LinearLearner): 534 535 """Train a linear SVM model.""" 535 536 536 537 L2R_L2LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual 537 L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC 538 L2R_L2LOSS = Orange.core.LinearLearner.L2R_L2Loss_SVC 538 539 L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L1Loss_SVC_Dual 539 540 L2R_L1LOSS_DUAL = Orange.core.LinearLearner.L2R_L2Loss_SVC_Dual 540 541 L1R_L2LOSS = Orange.core.LinearLearner.L1R_L2Loss_SVC 541 542 542 543 __new__ = _orange__new__(base=Orange.core.LinearLearner) 543 544 544 545 def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01, **kwargs): 545 546 """ … … 568 569 pass 569 570 # raise ValueError("Invalid solver_type parameter.") 570 571 571 572 self.preproc = default_preprocessor() 572 573 573 574 def __call__(self, instances, weight_id=None): 574 575 instances = self.preproc(instances) 575 576 classifier = super(LinearSVMLearner, self).__call__(instances, weight_id) 576 577 return classifier 577 578 578 579 LinearLearner = LinearSVMLearner 579 580 … … 582 583 """ 583 584 __new__ = _orange__new__(base=Orange.core.LinearLearner) 584 585 585 586 def __init__(self, C=1.0, eps=0.01, **kwargs): 586 587 """\ … … 596 597 for name, val in kwargs.items(): 597 598 setattr(self, name, val) 598 599 599 600 self.solver_type = self.MCSVM_CS 600 601 self.preproc = default_preprocessor() 601 602 602 603 def __call__(self, instances, weight_id=None): 603 604 instances = self.preproc(instances) … … 617 618 618 619 """ 619 620 620 621 def update_weights(w, key, val, mul): 621 622 if key in w: 622 w[key] +=mul*val623 w[key] += mul * val 623 624 else: 624 w[key] =mul*val625 625 w[key] = mul * val 626 626 627 def to_float(val): 627 return float(val) if not val.isSpecial() else 0.0 628 629 SVs =classifier.support_vectors628 return float(val) if not val.isSpecial() else 0.0 629 630 SVs = classifier.support_vectors 630 631 weights = [] 631 632 632 633 class_var = SVs.domain.class_var 633 634 if classifier.svm_type in [SVMLearner.C_SVC, SVMLearner.Nu_SVC]: … … 638 639 sv_ranges = [(0, classifier.nSV[0])] 639 640 for n in classifier.nSV[1:]: 640 sv_ranges.append((sv_ranges[1][1], sv_ranges[1][1] +n))641 sv_ranges.append((sv_ranges[1][1], sv_ranges[1][1] + n)) 641 642 else: 642 643 sv_ranges = [(0, len(SVs))] 643 644 644 645 for i in range(len(classes)  1): 645 for j in range(i +1, len(classes)):646 for j in range(i + 1, len(classes)): 646 647 w = {} 647 648 coef_ind = j  1 … … 653 654 update_weights(w, attr, to_float(SVs[sv_ind][attr]), \ 654 655 classifier.coef[coef_ind][sv_ind]) 655 coef_ind =i656 coef_ind = i 656 657 for sv_ind in range(*sv_ranges[j]): 657 658 attributes = SVs.domain.attributes + \ 658 659 SVs[sv_ind].getmetas(False, Orange.feature.Descriptor).keys() 659 660 for attr in attributes: 660 if attr.varType ==Orange.feature.Type.Continuous:661 if attr.varType == Orange.feature.Type.Continuous: 661 662 update_weights(w, attr, to_float(SVs[sv_ind][attr]), \ 662 663 classifier.coef[coef_ind][sv_ind]) 663 664 weights.append(w) 664 665 665 666 if sum: 666 667 scores = defaultdict(float) 667 668 668 669 for w in weights: 669 670 for attr, w_attr in w.items(): 670 scores[attr] += w_attr **2671 scores[attr] += w_attr ** 2 671 672 for key in scores: 672 673 scores[key] = math.sqrt(scores[key]) … … 678 679 679 680 def example_weighted_sum(example, weights): 680 sum =0681 sum = 0 681 682 for attr, w in weights.items(): 682 683 sum += float(example[attr]) * w 683 684 return sum 684 685 685 686 exampleWeightedSum = example_weighted_sum 686 687 … … 714 715 715 716 """ 716 717 717 718 def __new__(cls, attr=None, data=None, weight_id=None, **kwargs): 718 719 self = Orange.feature.scoring.Score.__new__(cls, **kwargs) … … 722 723 else: 723 724 return self 724 725 725 726 def __reduce__(self): 726 727 return ScoreSVMWeights, (), dict(self.__dict__) 727 728 728 729 def __init__(self, learner=None, **kwargs): 729 730 """ … … 734 735 """ 735 736 if learner: 736 self.learner = learner 737 self.learner = learner 737 738 else: 738 739 self.learner = LinearSVMLearner(solver_type= 739 740 LinearSVMLearner.L2R_L2LOSS_DUAL) 740 741 741 742 self._cached_examples = None 742 743 743 744 def __call__(self, attr, data, weight_id=None): 744 745 if data is self._cached_examples: … … 757 758 758 759 class RFE(object): 759 760 760 761 """Recursive feature elimination using linear SVM derived attribute 761 762 weights. … … 772 773 773 774 """ 774 775 775 776 def __init__(self, learner=None): 776 777 self.learner = learner or SVMLearner(kernel_type= … … 787 788 attrs = data.domain.attributes 788 789 attrScores = {} 789 790 790 791 while len(attrs) > stop_at: 791 792 weights = get_linear_svm_weights(self.learner(data), sum=False) … … 795 796 for w in weights: 796 797 for attr, wAttr in w.items(): 797 score[attr] += wAttr **2798 score[attr] += wAttr ** 2 798 799 score = score.items() 799 score.sort(lambda a, b:cmp(a[1],b[1]))800 numToRemove = max(int(len(attrs) *1.0/(iter+1)), 1)800 score.sort(lambda a, b:cmp(a[1], b[1])) 801 numToRemove = max(int(len(attrs) * 1.0 / (iter + 1)), 1) 801 802 for attr, s in score[:numToRemove]: 802 803 attrScores[attr] = len(attrScores) … … 806 807 iter += 1 807 808 return attrScores 808 809 809 810 @Orange.misc.deprecated_keywords({"numSelected": "num_selected", "progressCallback": "progress_callback"}) 810 811 def __call__(self, data, num_selected=20, progress_callback=None): … … 819 820 scores = self.get_attr_scores(data, progress_callback=progress_callback) 820 821 scores = sorted(scores.items(), key=lambda item: item[1]) 821 822 822 823 scores = dict(scores[num_selected:]) 823 824 attrs = [attr for attr in data.domain.attributes if attr in scores] … … 846 847 847 848 """ 848 849 849 850 attrs = data.domain.attributes + data.domain.getmetas().values() 850 attrs = [attr for attr in attrs if attr.varType 851 in [Orange.feature.Type.Continuous, 851 attrs = [attr for attr in attrs if attr.varType 852 in [Orange.feature.Type.Continuous, 852 853 Orange.feature.Type.Discrete]] 853 854 cv = data.domain.classVar 854 855 855 856 for ex in data: 856 857 if cv.varType == Orange.feature.Type.Discrete: 857 file.write(str(int(ex[cv]))) 858 file.write(str(int(ex[cv]))) 858 859 else: 859 860 file.write(str(float(ex[cv]))) 860 861 861 862 for i, attr in enumerate(attrs): 862 863 if not ex[attr].isSpecial(): 863 file.write(" " +str(i+1)+":"+str(float(ex[attr])))864 file.write(" " + str(i + 1) + ":" + str(float(ex[attr]))) 864 865 file.write("\n") 865 866 866 867 tableToSVMFormat = table_to_svm_format
Note: See TracChangeset
for help on using the changeset viewer.