Changeset 10682:289189dd68d9 in orange
 Timestamp:
 03/29/12 13:44:21 (2 years ago)
 Branch:
 default
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/classification/svm/__init__.py
r10679 r10682 777 777 raise TypeError("Can only learn a discrete class.") 778 778 779 if data.domain.has_discrete_attributes( ) or self.normalization:779 if data.domain.has_discrete_attributes(False) or self.normalization: 780 780 dc = Orange.data.continuization.DomainContinuizer() 781 781 dc.multinomial_treatment = dc.NValues … … 820 820 raise TypeError("Can only learn a discrete class.") 821 821 822 if data.domain.has_discrete_attributes( ) or self.normalization:822 if data.domain.has_discrete_attributes(False) or self.normalization: 823 823 dc = Orange.data.continuization.DomainContinuizer() 824 824 dc.multinomial_treatment = dc.NValues … … 856 856 SVs = classifier.support_vectors 857 857 class_var = SVs.domain.class_var 858 858 859 859 if classifier.svm_type in [SVMLearner.C_SVC, SVMLearner.Nu_SVC]: 860 860 weights = [] … … 869 869 SVs = bin_classifier.support_vectors 870 870 w = {} 871 871 872 872 for coef, sv_ind in bin_classifier.coef[0]: 873 873 SV = SVs[sv_ind] … … 877 877 if attr.varType == Orange.feature.Type.Continuous: 878 878 update_weights(w, attr, to_float(SV[attr]), coef) 879 879 880 880 weights.append(w) 881 881 if sum: … … 888 888 weights = dict(scores) 889 889 else: 890 # raise TypeError("SVM classification model expected.")891 890 weights = {} 892 891 for coef, sv_ind in classifier.coef[0]: … … 897 896 if attr.varType == Orange.feature.Type.Continuous: 898 897 update_weights(weights, attr, to_float(SV[attr]), coef) 899 898 900 899 return weights 901 900 902 901 getLinearSVMWeights = get_linear_svm_weights 903 902 … … 912 911 class ScoreSVMWeights(Orange.feature.scoring.Score): 913 912 """ 914 Score a feature by the squared sum of weights usinga linear SVM915 classifier.913 Score a feature using squares of weights of a linear SVM 914 model. 916 915 917 916 Example: 918 917 919 918 >>> score = Orange.classification.svm.ScoreSVMWeights() 920 >>> for feature in table.domain.features: 921 ... print "%35s: %.3f" % (feature.name, score(feature, table)) 922 compactness : 0.019 923 circularity : 0.025 924 distance circularity : 0.007 925 radius ratio : 0.010 926 pr.axis aspect ratio : 0.076 927 max.length aspect ratio : 0.010 928 scatter ratio : 0.046 929 elongatedness : 0.095 930 pr.axis rectangularity : 0.006 931 max.length rectangularity : 0.030 932 scaled variance along major axis : 0.001 933 scaled variance along minor axis : 0.001 934 scaled radius of gyration : 0.002 935 skewness about major axis : 0.004 936 skewness about minor axis : 0.003 937 kurtosis about minor axis : 0.001 938 kurtosis about major axis : 0.060 939 hollows ratio : 0.029 940 941 942 """ 919 >>> svm_scores = [(score(f, table), f) for f in table.domain.features] 920 >>> for feature_score, feature in sorted(svm_scores, reverse=True): 921 ... print "%35s: %.3f" % (feature.name, feature_score) 922 kurtosis about major axis : 47.113 923 pr.axis aspect ratio : 44.949 924 max.length rectangularity : 39.748 925 radius ratio : 29.098 926 scatter ratio : 26.133 927 skewness about major axis : 24.403 928 compactness : 20.432 929 hollows ratio : 20.109 930 max.length aspect ratio : 15.757 931 scaled radius of gyration : 15.242 932 scaled variance along minor axis : 14.289 933 pr.axis rectangularity : 9.882 934 circularity : 8.293 935 distance circularity : 7.785 936 scaled variance along major axis : 6.179 937 elongatedness : 4.038 938 skewness about minor axis : 1.351 939 kurtosis about minor axis : 0.760 940 941 """ 942 943 handles_discrete = True 944 handles_continuous = True 945 computes_thresholds = False 946 needs = Orange.feature.scoring.Score.Generator 943 947 944 948 def __new__(cls, attr=None, data=None, weight_id=None, **kwargs): … … 955 959 def __init__(self, learner=None, **kwargs): 956 960 """ 957 :param learner: Learner used for weight estimation 958 (default LinearSVMLearner(solver_type=L2Loss_SVM_Dual)) 961 :param learner: Learner used for weight estimation 962 (by default ``LinearSVMLearner(solver_type=L2R_L2LOSS_DUAL, C=1.0)`` 963 will be used for classification problems and 964 ``SVMLearner(svm_type=Epsilon_SVR, kernel_type=Linear, C=1.0, p=0.25)`` 965 for regression problems. 966 959 967 :type learner: Orange.core.LinearLearner 960 968 961 969 """ 962 if learner: 963 self.learner = learner 964 else: 965 self.learner = LinearSVMLearner(solver_type= 966 LinearSVMLearner.L2R_L2LOSS_DUAL) 967 970 self.learner = learner 968 971 self._cached_examples = None 969 972 970 973 def __call__(self, attr, data, weight_id=None): 974 if attr not in data.domain.attributes: 975 raise ValueError("Feature %r is not from the domain." % attr) 976 977 if self.learner is not None: 978 learner = self.learner 979 elif isinstance(data.domain.class_var, variable.Discrete): 980 learner = LinearSVMLearner(solver_type= 981 LinearSVMLearner.L2R_L2LOSS_DUAL, 982 C=1.0) 983 elif isinstance(data.domain.class_var, variable.Continuous): 984 learner = SVMLearner(svm_type=SVMLearner.Epsilon_SVR, 985 kernel_type=kernels.Linear, 986 C=1.0, p=0.25) 987 else: 988 raise TypeError("Cannot handle the class variable type %r" % \ 989 type(data.domain.class_var)) 990 971 991 if data is self._cached_examples: 972 992 weights = self._cached_weights 973 993 else: 974 classifier = self.learner(data, weight_id)994 classifier = learner(data, weight_id) 975 995 self._cached_examples = data 976 import numpy 977 weights = numpy.array(classifier.weights) 978 weights = numpy.sum(weights ** 2, axis=0) 979 weights = dict(zip(data.domain.attributes, weights)) 996 weights = self._extract_weights(classifier, data.domain.attributes) 980 997 self._cached_weights = weights 981 998 return weights.get(attr, 0.0) 999 1000 def _extract_weights(self, classifier, original_features): 1001 """Extract weights from a svm classifer (``SVMClassifier`` or a 1002 ``LinearLearner`` instance). 1003 1004 """ 1005 import numpy as np 1006 if isinstance(classifier, SVMClassifier): 1007 weights = get_linear_svm_weights(classifier, sum=True) 1008 if isinstance(classifier.class_var, variable.Continuous): 1009 # The weights are in the the original non squared form 1010 weights = dict((f, w ** 2) for f, w in weights.items()) 1011 elif isinstance(classifier, Orange.core.LinearClassifier): 1012 weights = np.array(classifier.weights) 1013 weights = np.sum(weights ** 2, axis=0) 1014 weights = dict(zip(classifier.domain.attributes, weights)) 1015 else: 1016 raise TypeError("Don't know how to use classifier type %r" % \ 1017 type(classifier)) 1018 1019 # collect dummy variables that were created for discrete features 1020 sources = self._collect_source(weights.keys()) 1021 source_weights = dict.fromkeys(original_features, 0.0) 1022 for f in original_features: 1023 if f not in weights and f in sources: 1024 dummys = sources[f] 1025 # Use averege weight 1026 source_weights[f] = np.average([weights[d] for d in dummys]) 1027 else: 1028 raise ValueError(f) 1029 1030 return source_weights 1031 1032 def _collect_source(self, vars): 1033 """ Given a list of variables ``var``, return a mapping from source 1034 variables (``source_variable`` or ``get_value_from.variable`` members) 1035 back to the variables in ``vars``. 1036 1037 """ 1038 source = defaultdict(list) 1039 for var in vars: 1040 svar = None 1041 if var.source_variable: 1042 source[var.source_variable].append(var) 1043 elif isinstance(var.get_value_from, Orange.core.ClassifierFromVar): 1044 source[var.get_value_from.variable].append(var) 1045 elif isinstance(var.get_value_from, Orange.core.ImputeClassifier): 1046 source[var.get_value_from.classifier_from_var.variable].append(var) 1047 else: 1048 source[var].append(var) 1049 return dict(source) 982 1050 983 1051 MeasureAttribute_SVMWeights = ScoreSVMWeights
Note: See TracChangeset
for help on using the changeset viewer.