Changeset 10618:f55f1477bd5f in orange
 Timestamp:
 03/23/12 14:32:50 (2 years ago)
 Branch:
 default
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/classification/svm/__init__.py
r10585 r10618 2 2 3 3 from collections import defaultdict 4 from operator import add 4 5 5 6 import Orange.core … … 15 16 from Orange.core import LinearClassifier, \ 16 17 LinearLearner, \ 17 SVMClassifier , \18 SVMClassifierSparse 18 SVMClassifier as _SVMClassifier, \ 19 SVMClassifierSparse as _SVMClassifierSparse 19 20 20 21 from Orange.data import preprocess … … 177 178 data = self._normalize(data) 178 179 svm = self.learner(data) 179 return SVMClassifier Wrapper(svm)180 return SVMClassifier(svm) 180 181 181 182 @Orange.utils.deprecated_keywords({"progressCallback": "progress_callback"}) … … 253 254 wrap_methods=["__init__", "tune_parameters"])(SVMLearner) 254 255 255 class SVMClassifierWrapper(Orange.core.SVMClassifier): 256 def __new__(cls, wrapped): 257 return Orange.core.SVMClassifier.__new__(cls, name=wrapped.name) 256 class SVMClassifier(_SVMClassifier): 257 def __new__(cls, *args, **kwargs): 258 if args and isinstance(args[0], _SVMClassifier): 259 # Will wrap a C++ object 260 return _SVMClassifier.__new__(cls, name=args[0].name) 261 elif args and isinstance(args[0], variable.Descriptor): 262 # The constructor call for the C++ object. 263 # This is a hack to support loading of old pickled classifiers 264 return _SVMClassifier.__new__(_SVMClassifier, *args, **kwargs) 265 else: 266 raise ValueError 258 267 259 268 def __init__(self, wrapped): 260 self.wrapped = wrapped 261 for name, val in wrapped.__dict__.items(): 262 self.__dict__[name] = val 263 264 def __call__(self, example, what=Orange.core.GetValue): 265 example = Orange.data.Instance(self.wrapped.domain, example) 266 return self.wrapped(example, what) 267 268 def class_distribution(self, example): 269 example = Orange.data.Instance(self.wrapped.domain, example) 270 return self.wrapped.class_distribution(example) 271 272 def get_decision_values(self, example): 273 example = Orange.data.Instance(self.wrapped.domain, example) 274 dec_values = self.wrapped.get_decision_values(example) 275 # decision values are ordred by libsvm internal class values 276 # i.e. the order of labels in the data 277 map = self._get_libsvm_labels_map() 278 n_class = len(self.class_var.values) 279 new_values = [] 269 self.class_var = wrapped.class_var 270 self.domain = wrapped.domain 271 self.computes_probabilities = wrapped.computes_probabilities 272 self.examples = wrapped.examples 273 self.svm_type = wrapped.svm_type 274 self.kernel_func = wrapped.kernel_func 275 self.kernel_type = wrapped.kernel_type 276 self.__wrapped = wrapped 277 278 assert(type(wrapped) in [_SVMClassifier, _SVMClassifierSparse]) 279 280 if self.svm_type in [SVMLearner.C_SVC, SVMLearner.Nu_SVC]: 281 # Reorder the support vectors 282 label_map = self._get_libsvm_labels_map() 283 start = 0 284 support_vectors = [] 285 for n in wrapped.n_SV: 286 support_vectors.append(wrapped.support_vectors[start: start + n]) 287 start += n 288 support_vectors = [support_vectors[i] for i in label_map] 289 self.support_vectors = Orange.data.Table(reduce(add, support_vectors)) 290 else: 291 self.support_vectors = wrapped.support_vectors 292 293 @property 294 def coef(self): 295 """Coefficients of the underlying binary 1vs1 classifiers. 296 297 This is a #Classes * (#Classses  1) list of lists where 298 each sublist contains tuples of (alpha, support_vector_index) 299 300 """ 301 # We need to reorder the coef values 302 # see http://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html#f804 303 # for more information on how the coefs are stored by libsvm 304 # internally. 305 306 import numpy as np 307 c_map = self._get_libsvm_bin_classifier_map() 308 label_map = self._get_libsvm_labels_map() 309 libsvm_coef = self.__wrapped.coef 310 coef = [] #[None] * len(c_map) 311 n_class = len(label_map) 312 n_SV = self.__wrapped.n_SV 313 coef_array = np.array(self.__wrapped.coef) 314 p = 0 315 libsvm_class_indices = np.cumsum([0] + list(n_SV), dtype=int) 316 class_indices = np.cumsum([0] + list(self.n_SV), dtype=int) 280 317 for i in range(n_class  1): 281 318 for j in range(i + 1, n_class): 282 # Internal indices 283 ni, nj = map.index(i), map.index(j) 284 mult = 1.0 319 ni = label_map[i] 320 nj = label_map[j] 321 bc_index, mult = c_map[p] 322 285 323 if ni > nj: 286 324 ni, nj = nj, ni 287 # Multiply by 1 if we switch the order of the 1vs1 288 # classifier. 289 mult = 1.0 290 val_index = n_class * (n_class  1) / 2  (n_class  ni  1) * (n_class  ni  2) / 2  (n_class  nj) 291 new_values.append(mult * dec_values[val_index]) 292 return Orange.core.FloatList(new_values) 325 326 # Original class indices 327 c1_range = range(libsvm_class_indices[ni], 328 libsvm_class_indices[ni + 1]) 329 c2_range = range(libsvm_class_indices[nj], 330 libsvm_class_indices[nj + 1]) 331 332 coef1 = mult * coef_array[nj  1, c1_range] 333 coef2 = mult * coef_array[ni, c2_range] 334 335 # Mapped class indices 336 c1_range = range(class_indices[i], 337 class_indices[i + 1]) 338 c2_range = range(class_indices[j], 339 class_indices[j + 1]) 340 if mult == 1.0: 341 c1_range, c2_range = c2_range, c1_range 342 343 nonzero1 = np.abs(coef1) > 0.0 344 nonzero2 = np.abs(coef2) > 0.0 345 346 coef1 = coef1[nonzero1] 347 coef2 = coef2[nonzero2] 348 349 c1_range = [sv_i for sv_i, nz in zip(c1_range, nonzero1) if nz] 350 c2_range = [sv_i for sv_i, nz in zip(c2_range, nonzero2) if nz] 351 352 coef.append(list(zip(coef1, c1_range)) + list(zip(coef2, c2_range))) 353 354 p += 1 355 return coef 356 357 @property 358 def rho(self): 359 """Constant (bias) terms in each underlying binary 1vs1 classifier. 360 """ 361 c_map = self._get_libsvm_bin_classifier_map() 362 rho = self.__wrapped.rho 363 return [rho[i] * m for i, m in c_map] 364 365 @property 366 def n_SV(self): 367 """Number of support vectors for each class. 368 """ 369 if self.__wrapped.n_SV is not None: 370 c_map = self._get_libsvm_labels_map() 371 n_SV= self.__wrapped.n_SV 372 return [n_SV[i] for i in c_map] 373 else: 374 return None 375 376 @property 377 def prob_a(self): 378 if self.__wrapped.prob_a is not None: 379 c_map = self._get_libsvm_bin_classifier_map() 380 prob_a = self.__wrapped.prob_a 381 # TODO: What about order switch? 382 return [prob_a[i] for i, _ in c_map] 383 else: 384 return None 385 386 @property 387 def prob_b(self): 388 if self.__wrapped.prob_b is not None: 389 c_map = self._get_libsvm_bin_classifier_map() 390 prob_b = self.__wrapped.prob_b 391 # TODO: What about order switch? 392 return [prob_b[i] for i, _ in c_map] 393 else: 394 return None 395 396 def __call__(self, instance, what=Orange.core.GetValue): 397 """Classify a new ``instance`` 398 """ 399 instance = Orange.data.Instance(self.domain, instance) 400 return self.__wrapped(instance, what) 401 402 def class_distribution(self, instance): 403 """Return a class distribution for the ``instance`` 404 """ 405 instance = Orange.data.Instance(self.domain, instance) 406 return self.__wrapped.class_distribution(instance) 407 408 def get_decision_values(self, instance): 409 """Return the decision values of the binary 1vs1 410 classifiers for the ``instance`` (:class:`~Orange.data.Instance`). 411 412 """ 413 instance = Orange.data.Instance(self.domain, instance) 414 dec_values = self.__wrapped.get_decision_values(instance) 415 # decision values are ordered by libsvm internal class values 416 # i.e. the order of labels in the data 417 c_map = self._get_libsvm_bin_classifier_map() 418 return [dec_values[i] * m for i, m in c_map] 293 419 294 420 def get_model(self): 295 return self.wrapped.get_model() 421 """Return a string representing the model in the libsvm model format. 422 """ 423 return self.__wrapped.get_model() 296 424 297 425 def _get_libsvm_labels_map(self): 298 """Get the libsvm label mapping from the model string299 """ 300 labels = [line for line in self. get_model().splitlines() \426 """Get the internal libsvm label mapping. 427 """ 428 labels = [line for line in self.__wrapped.get_model().splitlines() \ 301 429 if line.startswith("label")] 302 430 labels = labels[0].split(" ")[1:] if labels else ["0"] 303 return [int(label) for label in labels] 304 431 labels = [int(label) for label in labels] 432 return [labels.index(i) for i in range(len(labels))] 433 434 def _get_libsvm_bin_classifier_map(self): 435 """Return the libsvm binary classifier mapping (due to label ordering) 436 """ 437 label_map = self._get_libsvm_labels_map() 438 bin_c_map = [] 439 n_class = len(self.class_var.values) 440 p = 0 441 for i in range(n_class  1): 442 for j in range(i + 1, n_class): 443 ni = label_map[i] 444 nj = label_map[j] 445 mult = 1 446 if ni > nj: 447 ni, nj = nj, ni 448 mult = 1 449 # classifier index 450 cls_index = n_class * (n_class  1) / 2  (n_class  ni  1) * (n_class  ni  2) / 2  (n_class  nj) 451 bin_c_map.append((cls_index, mult)) 452 return bin_c_map 453 305 454 def __reduce__(self): 306 return SVMClassifierWrapper, (self.wrapped,), dict([(name, val) \ 307 for name, val in self.__dict__.items() \ 308 if name not in self.wrapped.__dict__]) 309 455 return SVMClassifier, (self.__wrapped,), dict(self.__dict__) 456 310 457 def get_binary_classifier(self, c1, c2): 311 458 """Return a binary classifier for classes `c1` and `c2`. … … 317 464 c1 = int(self.class_var(c1)) 318 465 c2 = int(self.class_var(c2)) 319 320 libsvm_label = [line for line in self.get_model().splitlines() \ 321 if line.startswith("label")] 322 466 323 467 n_class = len(self.class_var.values) 324 468 … … 329 473 (self.class_var.values[c1], self.class_var.values[c2]), 330 474 values=["0", "1"]) 331 332 # Map the libsvm labels333 labels_map = self._get_libsvm_labels_map()334 c1 = labels_map.index(c1)335 c2 = labels_map.index(c2)336 475 337 476 mult = 1.0 … … 339 478 c1, c2 = c2, c1 340 479 mult = 1.0 341 342 # Index of the 1vs1 binary classifier 480 343 481 classifier_i = n_class * (n_class  1) / 2  (n_class  c1  1) * (n_class  c1  2) / 2  (n_class  c2) 344 482 345 # Indices for classes in the coef structure. 346 class_indices = np.cumsum([0] + list(self.n_SV), dtype=int) 347 c1_range = range(class_indices[c1], class_indices[c1 + 1]) 348 c2_range = range(class_indices[c2], class_indices[c2 + 1]) 349 350 coef_array = np.array(self.coef) 351 coef1 = mult * coef_array[c2  1, c1_range] 352 coef2 = mult * coef_array[c1, c2_range] 353 354 # Support vectors for the binary classifier 355 sv1 = [self.support_vectors[i] for i in c1_range] 356 sv2 = [self.support_vectors[i] for i in c2_range] 357 358 # Rho for the classifier 483 coef = self.coef[classifier_i] 484 485 coef1 = [(mult * alpha, sv_i) for alpha, sv_i in coef \ 486 if int(self.support_vectors[sv_i].get_class()) == c1] 487 coef2 = [(mult * alpha, sv_i) for alpha, sv_i in coef \ 488 if int(self.support_vectors[sv_i].get_class()) == c2] 489 359 490 rho = mult * self.rho[classifier_i] 360 491 361 # Filter non zero support vectors362 nonzero1 = np.abs(coef1) > 0.0363 nonzero2 = np.abs(coef2) > 0.0364 365 coef1 = coef1[nonzero1]366 coef2 = coef2[nonzero2]367 368 sv1 = [sv for sv, nz in zip(sv1, nonzero1) if nz]369 sv2 = [sv for sv, nz in zip(sv2, nonzero2) if nz]370 371 sv_indices1 = [i for i, nz in zip(c1_range, nonzero1) if nz]372 sv_indices2 = [i for i, nz in zip(c2_range, nonzero2) if nz]373 374 model = self._binary_libsvm_model(bin_class_var, [coef1, coef2],375 [rho], sv_indices1 + sv_indices2)376 377 all_sv = Orange.data.Table(sv1 + sv2)492 model = self._binary_libsvm_model_string(bin_class_var, 493 [coef1, coef2], 494 [rho]) 495 496 all_sv = [self.support_vectors[sv_i] \ 497 for c, sv_i in coef1 + coef2] 498 499 all_sv = Orange.data.Table(all_sv) 500 501 svm_classifier_type = type(self.__wrapped) 502 503 # Build args for svm_classifier_type constructor 504 args = (bin_class_var, self.examples, all_sv, model) 505 506 if isinstance(svm_classifier_type, _SVMClassifierSparse): 507 args = args + (int(self.__wrapped.use_non_meta),) 508 378 509 if self.kernel_type == kernels.Custom: 379 classifier = SVMClassifier(bin_class_var, self.examples, 380 all_sv, model, self.kernel_func) 381 else: 382 classifier = SVMClassifier(bin_class_var, self.examples, 383 all_sv, model) 510 args = args + (self.kernel_func,) 384 511 385 return SVMClassifierWrapper(classifier) 386 387 def _binary_libsvm_model(self, class_var, coefs, rho, sv_indices): 388 """Return a libsvm formated model string for binary subclassifier 512 native_classifier = svm_classifier_type(*args) 513 return SVMClassifier(native_classifier) 514 515 def _binary_libsvm_model_string(self, class_var, coef, rho): 516 """Return a libsvm formated model string for binary classifier 389 517 """ 390 518 import itertools 391 519 392 520 model = [] 393 521 394 522 # Take the model up to nr_classes 395 for line in self.get_model().splitlines(): 523 libsvm_model = self.__wrapped.get_model() 524 for line in libsvm_model.splitlines(): 396 525 if line.startswith("nr_class"): 397 526 break … … 400 529 401 530 model.append("nr_class %i" % len(class_var.values)) 402 model.append("total_sv %i" % len(sv_indices))531 model.append("total_sv %i" % reduce(add, [len(c) for c in coef])) 403 532 model.append("rho " + " ".join(str(r) for r in rho)) 404 533 model.append("label " + " ".join(str(i) for i in range(len(class_var.values)))) 405 534 # No probA and probB 406 535 407 model.append("nr_sv " + " ".join(str(len(c)) for c in coef s))536 model.append("nr_sv " + " ".join(str(len(c)) for c in coef)) 408 537 model.append("SV") 409 538 … … 414 543 return " ".join("%i:%f" % (i + 1, v) for i, v in values) 415 544 545 def sparse_instance_to_svm(inst): 546 non_meta = [] 547 base = 1 548 if self.__wrapped.use_non_meta: 549 non_meta = [instance_to_svm(inst)] 550 base += len(inst.domain) 551 metas = [] 552 for m_id, value in sorted(inst.get_metas().items(), reverse=True): 553 if not value.isSpecial() and float(value) != 0: 554 metas.append("%i:%f" % (base  m_id, float(value))) 555 return " ".join(non_meta + metas) 556 557 if isinstance(self.__wrapped, _SVMClassifierSparse): 558 converter = sparse_instance_to_svm 559 else: 560 converter = instance_to_svm(inst) 561 416 562 if self.kernel_type == kernels.Custom: 417 SV = self.get_model().split("SV\n", 1)[1]418 # Get the sv indices (the last entry in the SV entrys)563 SV = libsvm_model.split("SV\n", 1)[1] 564 # Get the sv indices (the last entry in the SV lines) 419 565 indices = [int(s.split(":")[1]) for s in SV.splitlines() if s.strip()] 420 for c, sv_i in zip(itertools.chain(*coefs), itertools.chain(sv_indices)): 566 567 # Reorder the indices 568 label_map = self._get_libsvm_labels_map() 569 start = 0 570 reordered_indices = [] 571 for n in self.__wrapped.n_SV: 572 reordered_indices.append(indices[start: start + n]) 573 start += n 574 reordered_indices = [reordered_indices[i] for i in label_map] 575 indices = reduce(add, reordered_indices) 576 577 for (c, sv_i) in itertools.chain(*coef): 421 578 model.append("%f 0:%i" % (c, indices[sv_i])) 422 579 else: 423 for c, sv_i in zip(itertools.chain(*coefs), itertools.chain(sv_indices)):424 model.append("%f %s" % (c, instance_to_svm(self.support_vectors[sv_i])))580 for (c, sv_i) in itertools.chain(*coef): 581 model.append("%f %s" % (c, converter(self.support_vectors[sv_i]))) 425 582 426 583 model.append("") … … 428 585 429 586 430 SVMClassifier Wrapper= Orange.utils.deprecated_members({587 SVMClassifier = Orange.utils.deprecated_members({ 431 588 "classDistribution": "class_distribution", 432 589 "getDecisionValues": "get_decision_values", 433 590 "getModel" : "get_model", 434 })(SVMClassifierWrapper) 591 }, wrap_methods=[])(SVMClassifier) 592 593 # Backwards compatibility (pickling) 594 SVMClassifierWrapper = SVMClassifier 435 595 436 596 class SVMLearnerSparse(SVMLearner): … … 503 663 folds=self.folds) 504 664 505 return SVMClassifier Wrapper(tunedLearner(newexamples,665 return SVMClassifier(tunedLearner(newexamples, 506 666 verbose=self.verbose)) 507 667 … … 630 790 w = {} 631 791 632 for SV, alpha in zip(SVs, bin_classifier.coef[0]): 792 for alpha, sv_ind in bin_classifier.coef[0]: 793 SV = SVs[sv_ind] 633 794 attributes = SVs.domain.attributes + \ 634 795 SV.getmetas(False, Orange.feature.Descriptor).keys()
Note: See TracChangeset
for help on using the changeset viewer.