Changeset 7675:bdeb8ed36b09 in orange
 Timestamp:
 02/14/11 23:15:29 (3 years ago)
 Branch:
 default
 Convert:
 ce19a3511cee200ab613c6555b1537bb124f8753
 Location:
 orange
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/classification/rules.py
r7660 r7675 565 565 import random 566 566 import math 567 from orngABCN2 import ABCN2568 567 569 568 … … 960 959 r.filterAndStore(oldInstances,weight,r.classifier.defaultVal) 961 960 return classifier 961 962 963 964 # Main ABCN2 class 965 class ABCN2(Orange.core.RuleLearner): 966 """COPIED&PASTED FROM orngABCN2  REFACTOR AND DOCUMENT ASAP! 967 This is implementation of ABCN2 + EVC as evaluation + LRC classification. 968 """ 969 970 def __init__(self, argumentID=0, width=5, m=2, opt_reduction=2, nsampling=100, max_rule_complexity=5, 971 rule_sig=1.0, att_sig=1.0, postpruning=None, min_quality=0., min_coverage=1, min_improved=1, min_improved_perc=0.0, 972 learn_for_class = None, learn_one_rule = False, evd=None, evd_arguments=None, prune_arguments=False, analyse_argument=1, 973 alternative_learner = None, min_cl_sig = 0.5, min_beta = 0.0, set_prefix_rules = False, add_sub_rules = False, 974 **kwds): 975 """ 976 Parameters: 977 General rule learning: 978 width ... beam width (default 5) 979 learn_for_class ... learner rules for one class? otherwise None 980 learn_one_rule ... learn one rule only ? 981 analyse_argument ... learner only analyses argument with this index; if set to 1, then it learns normally 982 983 Evaluator related: 984 m ... mestimate to be corrected with EVC (default 2) 985 opt_reduction ... types of EVC correction; 0=no correction, 1=pessimistic, 2=normal (default 2) 986 nsampling ... number of samples in estimating extreme value distribution (for EVC) (default 100) 987 evd ... pre given extreme value distributions 988 evd_arguments ... pre given extreme value distributions for arguments 989 990 Rule Validation: 991 rule_sig ... minimal rule significance (default 1.0) 992 att_sig ... minimal attribute significance in rule (default 1.0) 993 max_rule_complexity ... maximum number of conditions in rule (default 5) 994 min_coverage ... minimal number of covered examples (default 5) 995 996 Probabilistic covering: 997 min_improved ... minimal number of examples improved in probabilistic covering (default 1) 998 min_improved_perc ... minimal percentage of covered examples that need to be improved (default 0.0) 999 1000 Classifier (LCR) related: 1001 add_sub_rules ... add sub rules ? (default False) 1002 min_cl_sig ... minimal significance of beta in classifier (default 0.5) 1003 min_beta ... minimal beta value (default 0.0) 1004 set_prefix_rules ... should ordered prefix rules be added? (default False) 1005 alternative_learner ... use rulelearner as a correction method for other machine learning methods. (default None) 1006 1007 """ 1008 1009 1010 # argument ID which is passed to abcn2 learner 1011 self.argumentID = argumentID 1012 # learn for specific class only? 1013 self.learn_for_class = learn_for_class 1014 # only analysing a specific argument or learning all at once 1015 self.analyse_argument = analyse_argument 1016 # should we learn only one rule? 1017 self.learn_one_rule = learn_one_rule 1018 self.postpruning = postpruning 1019 # rule finder 1020 self.ruleFinder = Orange.core.RuleBeamFinder() 1021 self.ruleFilter = Orange.core.RuleBeamFilter_Width(width=width) 1022 self.ruleFilter_arguments = ABBeamFilter(width=width) 1023 if max_rule_complexity  1 < 0: 1024 max_rule_complexity = 10 1025 self.ruleFinder.ruleStoppingValidator = Orange.core.RuleValidator_LRS(alpha = 1.0, min_quality = 0., max_rule_complexity = max_rule_complexity  1, min_coverage=min_coverage) 1026 self.refiner = Orange.core.RuleBeamRefiner_Selector() 1027 self.refiner_arguments = SelectorAdder(discretizer = Orange.core.EntropyDiscretization(forceAttribute = 1, 1028 maxNumberOfIntervals = 2)) 1029 self.prune_arguments = prune_arguments 1030 # evc evaluator 1031 evdGet = Orange.core.EVDistGetter_Standard() 1032 self.ruleFinder.evaluator = Orange.core.RuleEvaluator_mEVC(m=m, evDistGetter = evdGet, min_improved = min_improved, min_improved_perc = min_improved_perc) 1033 self.ruleFinder.evaluator.returnExpectedProb = True 1034 self.ruleFinder.evaluator.optimismReduction = opt_reduction 1035 self.ruleFinder.evaluator.ruleAlpha = rule_sig 1036 self.ruleFinder.evaluator.attributeAlpha = att_sig 1037 self.ruleFinder.evaluator.validator = Orange.core.RuleValidator_LRS(alpha = 1.0, min_quality = min_quality, min_coverage=min_coverage, max_rule_complexity = max_rule_complexity  1) 1038 1039 # learn stopping criteria 1040 self.ruleStopping = None 1041 self.dataStopping = Orange.core.RuleDataStoppingCriteria_NoPositives() 1042 # evd fitting 1043 self.evd_creator = EVDFitter(self,n=nsampling) 1044 self.evd = evd 1045 self.evd_arguments = evd_arguments 1046 # classifier 1047 self.add_sub_rules = add_sub_rules 1048 self.classifier = PILAR(alternative_learner = alternative_learner, min_cl_sig = min_cl_sig, min_beta = min_beta, set_prefix_rules = set_prefix_rules) 1049 # arbitrary parameters 1050 self.__dict__.update(kwds) 1051 1052 1053 def __call__(self, examples, weightID=0): 1054 # initialize progress bar 1055 progress=getattr(self,"progressCallback",None) 1056 if progress: 1057 progress.start = 0.0 1058 progress.end = 0.0 1059 distrib = Orange.core.Distribution(examples.domain.classVar, examples, weightID) 1060 distrib.normalize() 1061 1062 # we begin with an empty set of rules 1063 all_rules = Orange.core.RuleList() 1064 1065 # th en, iterate through all classes and learn rule for each class separately 1066 for cl_i,cl in enumerate(examples.domain.classVar): 1067 if progress: 1068 step = distrib[cl] / 2. 1069 progress.start = progress.end 1070 progress.end += step 1071 1072 if self.learn_for_class and not self.learn_for_class in [cl,cl_i]: 1073 continue 1074 1075 # rules for this class only 1076 rules, arg_rules = Orange.core.RuleList(), Orange.core.RuleList() 1077 1078 # create dichotomous class 1079 dich_data = self.create_dich_class(examples, cl) 1080 1081 # preparation of the learner (covering, evd, etc.) 1082 self.prepare_settings(dich_data, weightID, cl_i, progress) 1083 1084 # learn argumented rules first ... 1085 self.turn_ABML_mode(dich_data, weightID, cl_i) 1086 # first specialize all unspecialized arguments 1087 # dich_data = self.specialise_arguments(dich_data, weightID) 1088 # comment: specialisation of arguments is within learning of an argumented rule; 1089 # this is now different from the published algorithm 1090 if progress: 1091 progress.start = progress.end 1092 progress.end += step 1093 1094 aes = self.get_argumented_examples(dich_data) 1095 aes = self.sort_arguments(aes, dich_data) 1096 while aes: 1097 if self.analyse_argument > 1 and not dich_data[self.analyse_argument] == aes[0]: 1098 aes = aes[1:] 1099 continue 1100 ae = aes[0] 1101 rule = self.learn_argumented_rule(ae, dich_data, weightID) # target class is always first class (0) 1102 if not progress: 1103 print "learned rule", Orange.classification.rules.ruleToString(rule) 1104 if rule: 1105 arg_rules.append(rule) 1106 aes = filter(lambda x: not rule(x), aes) 1107 else: 1108 aes = aes[1:] 1109 if not progress: 1110 print " arguments finished ... " 1111 1112 # remove all examples covered by rules 1113 ## for rule in rules: 1114 ## dich_data = self.remove_covered_examples(rule, dich_data, weightID) 1115 ## if progress: 1116 ## progress(self.remaining_probability(dich_data),None) 1117 1118 # learn normal rules on remaining examples 1119 if self.analyse_argument == 1: 1120 self.turn_normal_mode(dich_data, weightID, cl_i) 1121 while dich_data: 1122 # learn a rule 1123 rule = self.learn_normal_rule(dich_data, weightID, self.apriori) 1124 if not rule: 1125 break 1126 if not progress: 1127 print "rule learned: ", Orange.classification.rules.ruleToString(rule), rule.quality 1128 dich_data = self.remove_covered_examples(rule, dich_data, weightID) 1129 if progress: 1130 progress(self.remaining_probability(dich_data),None) 1131 rules.append(rule) 1132 if self.learn_one_rule: 1133 break 1134 1135 for r in arg_rules: 1136 dich_data = self.remove_covered_examples(r, dich_data, weightID) 1137 rules.append(r) 1138 1139 # prune unnecessary rules 1140 rules = self.prune_unnecessary_rules(rules, dich_data, weightID) 1141 1142 if self.add_sub_rules: 1143 rules = self.add_sub_rules_call(rules, dich_data, weightID) 1144 1145 # restore domain and class in rules, add them to all_rules 1146 for r in rules: 1147 all_rules.append(self.change_domain(r, cl, examples, weightID)) 1148 1149 if progress: 1150 progress(1.0,None) 1151 # create a classifier from all rules 1152 return self.create_classifier(all_rules, examples, weightID) 1153 1154 def learn_argumented_rule(self, ae, examples, weightID): 1155 # prepare roots of rules from arguments 1156 positive_args = self.init_pos_args(ae, examples, weightID) 1157 if not positive_args: # something wrong 1158 raise "There is a problem with argumented example %s"%str(ae) 1159 return None 1160 negative_args = self.init_neg_args(ae, examples, weightID) 1161 1162 # set negative arguments in refiner 1163 self.ruleFinder.refiner.notAllowedSelectors = negative_args 1164 self.ruleFinder.refiner.example = ae 1165 # set arguments to filter 1166 self.ruleFinder.ruleFilter.setArguments(examples.domain,positive_args) 1167 1168 # learn a rule 1169 self.ruleFinder.evaluator.bestRule = None 1170 self.ruleFinder.evaluator.returnBestFuture = True 1171 self.ruleFinder(examples,weightID,0,positive_args) 1172 ## self.ruleFinder.evaluator.bestRule.quality = 0.8 1173 1174 # return best rule 1175 return self.ruleFinder.evaluator.bestRule 1176 1177 def prepare_settings(self, examples, weightID, cl_i, progress): 1178 # apriori distribution 1179 self.apriori = Orange.core.Distribution(examples.domain.classVar,examples,weightID) 1180 1181 # prepare covering mechanism 1182 self.coverAndRemove = CovererAndRemover_Prob(examples, weightID, 0, self.apriori) 1183 self.ruleFinder.evaluator.probVar = examples.domain.getmeta(self.coverAndRemove.probAttribute) 1184 1185 # compute extreme distributions 1186 # TODO: why evd and evd_this???? 1187 if self.ruleFinder.evaluator.optimismReduction > 0 and not self.evd: 1188 self.evd_this = self.evd_creator.computeEVD(examples, weightID, target_class=0, progress = progress) 1189 if self.evd: 1190 self.evd_this = self.evd[cl_i] 1191 1192 def turn_ABML_mode(self, examples, weightID, cl_i): 1193 # evaluator 1194 if self.ruleFinder.evaluator.optimismReduction > 0 and self.argumentID: 1195 if self.evd_arguments: 1196 self.ruleFinder.evaluator.evDistGetter.dists = self.evd_arguments[cl_i] 1197 else: 1198 self.ruleFinder.evaluator.evDistGetter.dists = self.evd_this # self.evd_creator.computeEVD_example(examples, weightID, target_class=0) 1199 # rule refiner 1200 self.ruleFinder.refiner = self.refiner_arguments 1201 self.ruleFinder.refiner.argumentID = self.argumentID 1202 self.ruleFinder.ruleFilter = self.ruleFilter_arguments 1203 1204 def create_dich_class(self, examples, cl): 1205 """ create dichotomous class. """ 1206 (newDomain, targetVal) = createDichotomousClass(examples.domain, examples.domain.classVar, str(cl), negate=0) 1207 newDomainmetas = newDomain.getmetas() 1208 newDomain.addmeta(Orange.core.newmetaid(), examples.domain.classVar) # old class as meta 1209 dichData = examples.select(newDomain) 1210 if self.argumentID: 1211 for d in dichData: # remove arguments given to other classes 1212 if not d.getclass() == targetVal: 1213 d[self.argumentID] = "?" 1214 return dichData 1215 1216 def get_argumented_examples(self, examples): 1217 if not self.argumentID: 1218 return None 1219 1220 # get argumentated examples 1221 return ArgumentFilter_hasSpecial()(examples, self.argumentID, targetClass = 0) 1222 1223 def sort_arguments(self, arg_examples, examples): 1224 if not self.argumentID: 1225 return None 1226 evaluateAndSortArguments(examples, self.argumentID) 1227 if len(arg_examples)>0: 1228 # sort examples by their arguments quality (using first argument as it has already been sorted) 1229 sorted = arg_examples.native() 1230 sorted.sort(lambda x,y: cmp(x[self.argumentID].value.positiveArguments[0].quality, 1231 y[self.argumentID].value.positiveArguments[0].quality)) 1232 return Orange.core.ExampleTable(examples.domain, sorted) 1233 else: 1234 return None 1235 1236 def turn_normal_mode(self, examples, weightID, cl_i): 1237 # evaluator 1238 if self.ruleFinder.evaluator.optimismReduction > 0: 1239 if self.evd: 1240 self.ruleFinder.evaluator.evDistGetter.dists = self.evd[cl_i] 1241 else: 1242 self.ruleFinder.evaluator.evDistGetter.dists = self.evd_this # self.evd_creator.computeEVD(examples, weightID, target_class=0) 1243 # rule refiner 1244 self.ruleFinder.refiner = self.refiner 1245 self.ruleFinder.ruleFilter = self.ruleFilter 1246 1247 def learn_normal_rule(self, examples, weightID, apriori): 1248 if hasattr(self.ruleFinder.evaluator, "bestRule"): 1249 self.ruleFinder.evaluator.bestRule = None 1250 rule = self.ruleFinder(examples,weightID,0,Orange.core.RuleList()) 1251 if hasattr(self.ruleFinder.evaluator, "bestRule") and self.ruleFinder.evaluator.returnExpectedProb: 1252 rule = self.ruleFinder.evaluator.bestRule 1253 self.ruleFinder.evaluator.bestRule = None 1254 if self.postpruning: 1255 rule = self.postpruning(rule,examples,weightID,0, aprior) 1256 return rule 1257 1258 def remove_covered_examples(self, rule, examples, weightID): 1259 nexamples, nweight = self.coverAndRemove(rule,examples,weightID,0) 1260 return nexamples 1261 1262 1263 def prune_unnecessary_rules(self, rules, examples, weightID): 1264 return self.coverAndRemove.getBestRules(rules,examples,weightID) 1265 1266 def change_domain(self, rule, cl, examples, weightID): 1267 rule.examples = rule.examples.select(examples.domain) 1268 rule.classDistribution = Orange.core.Distribution(rule.examples.domain.classVar,rule.examples,weightID) # adapt distribution 1269 rule.classifier = Orange.core.DefaultClassifier(cl) # adapt classifier 1270 rule.filter = Orange.core.Filter_values(domain = examples.domain, 1271 conditions = rule.filter.conditions) 1272 if hasattr(rule, "learner") and hasattr(rule.learner, "arg_example"): 1273 rule.learner.arg_example = Orange.core.Example(examples.domain, rule.learner.arg_example) 1274 return rule 1275 1276 def create_classifier(self, rules, examples, weightID): 1277 return self.classifier(rules, examples, weightID) 1278 1279 def add_sub_rules_call(self, rules, examples, weightID): 1280 apriori = Orange.core.Distribution(examples.domain.classVar,examples,weightID) 1281 newRules = Orange.core.RuleList() 1282 for r in rules: 1283 newRules.append(r) 1284 1285 # loop through rules 1286 for r in rules: 1287 tmpList = Orange.core.RuleList() 1288 tmpRle = r.clone() 1289 tmpRle.filter.conditions = r.filter.conditions[:r.requiredConditions] # do not split argument 1290 tmpRle.parentRule = None 1291 tmpRle.filterAndStore(examples,weightID,r.classifier.defaultVal) 1292 tmpRle.complexity = 0 1293 tmpList.append(tmpRle) 1294 while tmpList and len(tmpList[0].filter.conditions) <= len(r.filter.conditions): 1295 tmpList2 = Orange.core.RuleList() 1296 for tmpRule in tmpList: 1297 # evaluate tmpRule 1298 oldREP = self.ruleFinder.evaluator.returnExpectedProb 1299 self.ruleFinder.evaluator.returnExpectedProb = False 1300 tmpRule.quality = self.ruleFinder.evaluator(tmpRule,examples,weightID,r.classifier.defaultVal,apriori) 1301 self.ruleFinder.evaluator.returnExpectedProb = oldREP 1302 # if rule not in rules already, add it to the list 1303 if not True in [Orange.classification.rules.rules_equal(ri,tmpRule) for ri in newRules] and len(tmpRule.filter.conditions)>0 and tmpRule.quality > apriori[r.classifier.defaultVal]/apriori.abs: 1304 newRules.append(tmpRule) 1305 # create new tmpRules, set parent Rule, append them to tmpList2 1306 if not True in [Orange.classification.rules.rules_equal(ri,tmpRule) for ri in newRules]: 1307 for c in r.filter.conditions: 1308 tmpRule2 = tmpRule.clone() 1309 tmpRule2.parentRule = tmpRule 1310 tmpRule2.filter.conditions.append(c) 1311 tmpRule2.filterAndStore(examples,weightID,r.classifier.defaultVal) 1312 tmpRule2.complexity += 1 1313 if tmpRule2.classDistribution.abs < tmpRule.classDistribution.abs: 1314 tmpList2.append(tmpRule2) 1315 tmpList = tmpList2 1316 return newRules 1317 1318 1319 def init_pos_args(self, ae, examples, weightID): 1320 pos_args = Orange.core.RuleList() 1321 # prepare arguments 1322 for p in ae[self.argumentID].value.positiveArguments: 1323 new_arg = Orange.core.Rule(filter=ArgFilter(argumentID = self.argumentID, 1324 filter = self.newFilter_values(p.filter)), 1325 complexity = 0) 1326 new_arg.valuesFilter = new_arg.filter.filter 1327 pos_args.append(new_arg) 1328 1329 1330 if hasattr(self.ruleFinder.evaluator, "returnExpectedProb"): 1331 old_exp = self.ruleFinder.evaluator.returnExpectedProb 1332 self.ruleFinder.evaluator.returnExpectedProb = False 1333 1334 # argument pruning (all or just unfinished arguments) 1335 # if pruning is chosen, then prune arguments if possible 1336 for p in pos_args: 1337 p.filterAndStore(examples, weightID, 0) 1338 # pruning on: we check on all conditions and take only best 1339 if self.prune_arguments: 1340 allowed_conditions = [c for c in p.filter.conditions] 1341 pruned_conditions = self.prune_arg_conditions(ae, allowed_conditions, examples, weightID) 1342 p.filter.conditions = pruned_conditions 1343 else: # prune only unspecified conditions 1344 spec_conditions = [c for c in p.filter.conditions if not c.unspecialized_condition] 1345 unspec_conditions = [c for c in p.filter.conditions if c.unspecialized_condition] 1346 # let rule cover now all examples filtered by specified conditions 1347 p.filter.conditions = spec_conditions 1348 p.filterAndStore(examples, weightID, 0) 1349 pruned_conditions = self.prune_arg_conditions(ae, unspec_conditions, p.examples, p.weightID) 1350 p.filter.conditions.extend(pruned_conditions) 1351 p.filter.filter.conditions.extend(pruned_conditions) 1352 # if argument does not contain all unspecialized reasons, add those reasons with minimum values 1353 at_oper_pairs = [(c.position, c.oper) for c in p.filter.conditions if type(c) == Orange.core.ValueFilter_continuous] 1354 for u in unspec_conditions: 1355 if not (u.position, u.oper) in at_oper_pairs: 1356 # find minimum value 1357 u.ref = min([float(e[u.position])10. for e in p.examples]) 1358 p.filter.conditions.append(u) 1359 p.filter.filter.conditions.append(u) 1360 1361 1362 # set parameters to arguments 1363 for p_i,p in enumerate(pos_args): 1364 p.filterAndStore(examples,weightID,0) 1365 p.filter.domain = examples.domain 1366 if not p.learner: 1367 p.learner = DefaultLearner(defaultValue=ae.getclass()) 1368 p.classifier = p.learner(p.examples, p.weightID) 1369 p.baseDist = p.classDistribution 1370 p.requiredConditions = len(p.filter.conditions) 1371 p.learner.setattr("arg_length", len(p.filter.conditions)) 1372 p.learner.setattr("arg_example", ae) 1373 p.complexity = len(p.filter.conditions) 1374 1375 if hasattr(self.ruleFinder.evaluator, "returnExpectedProb"): 1376 self.ruleFinder.evaluator.returnExpectedProb = old_exp 1377 1378 return pos_args 1379 1380 def newFilter_values(self, filter): 1381 newFilter = Orange.core.Filter_values() 1382 newFilter.conditions = filter.conditions[:] 1383 newFilter.domain = filter.domain 1384 newFilter.negate = filter.negate 1385 newFilter.conjunction = filter.conjunction 1386 return newFilter 1387 1388 def init_neg_args(self, ae, examples, weightID): 1389 return ae[self.argumentID].value.negativeArguments 1390 1391 def remaining_probability(self, examples): 1392 return self.coverAndRemove.covered_percentage(examples) 1393 1394 def prune_arg_conditions(self, crit_example, allowed_conditions, examples, weightID): 1395 if not allowed_conditions: 1396 return [] 1397 cn2_learner = Orange.classification.rules.CN2UnorderedLearner() 1398 cn2_learner.ruleFinder = Orange.core.RuleBeamFinder() 1399 cn2_learner.ruleFinder.refiner = SelectorArgConditions(crit_example, allowed_conditions) 1400 cn2_learner.ruleFinder.evaluator = Orange.classification.rules.MEstimate(self.ruleFinder.evaluator.m) 1401 rule = cn2_learner.ruleFinder(examples,weightID,0,Orange.core.RuleList()) 1402 return rule.filter.conditions 962 1403 963 1404 … … 1432 1873 return newRules 1433 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 ################################################################################ 1884 ################################################################################ 1885 ## This has been copyed&pasted from orngABCN2.py and not yet appropriately ## 1886 ## refactored and documented. ## 1887 ################################################################################ 1888 ################################################################################ 1889 1890 1891 """ This module implements argument based rule learning. 1892 The main learner class is ABCN2. The first few classes are some variants of ABCN2 with reasonable settings. """ 1893 1894 1895 import operator 1896 import random 1897 import numpy 1898 import math 1899 1900 from orngABML import * 1901 1902 # Default learner  returns # 1903 # default classifier with pre # 1904 # defined output class # 1905 class DefaultLearner(Orange.core.Learner): 1906 def __init__(self,defaultValue = None): 1907 self.defaultValue = defaultValue 1908 def __call__(self,examples,weightID=0): 1909 return Orange.core.DefaultClassifier(self.defaultValue,defaultDistribution = Orange.core.Distribution(examples.domain.classVar,examples,weightID)) 1910 1911 class ABCN2Ordered(ABCN2): 1912 """ Rules learned by ABCN2 are ordered and used as a decision list. """ 1913 def __init__(self, argumentID=0, **kwds): 1914 ABCN2.__init__(self, argumentID=argumentID, **kwds) 1915 self.classifier.set_prefix_rules = True 1916 self.classifier.optimize_betas = False 1917 1918 class ABCN2M(ABCN2): 1919 """ Argument based rule learning with mestimate as evaluation function. """ 1920 def __init__(self, argumentID=0, **kwds): 1921 ABCN2.__init__(self, argumentID=argumentID, **kwds) 1922 self.opt_reduction = 0 1923 1924 1925 # *********************** # 1926 # Argument based covering # 1927 # *********************** # 1928 1929 class ABBeamFilter(Orange.core.RuleBeamFilter): 1930 """ ABBeamFilter: Filters beam; 1931  leaves first N rules (by quality) 1932  leaves first N rules that have only of arguments in condition part 1933 """ 1934 def __init__(self,width=5): 1935 self.width=width 1936 self.pArgs=None 1937 1938 def __call__(self,rulesStar,examples,weightID): 1939 newStar=Orange.core.RuleList() 1940 rulesStar.sort(lambda x,y: cmp(x.quality,y.quality)) 1941 argsNum=0 1942 for r_i,r in enumerate(rulesStar): 1943 if r_i<self.width: # either is one of best "width" rules 1944 newStar.append(r) 1945 elif self.onlyPositives(r): 1946 if argsNum<self.width: 1947 newStar.append(r) 1948 argsNum+=1 1949 return newStar 1950 1951 def setArguments(self,domain,positiveArguments): 1952 self.pArgs = positiveArguments 1953 self.domain = domain 1954 self.argTab = [0]*len(self.domain.attributes) 1955 for arg in self.pArgs: 1956 for cond in arg.filter.conditions: 1957 self.argTab[cond.position]=1 1958 1959 def onlyPositives(self,rule): 1960 if not self.pArgs: 1961 return False 1962 1963 ruleTab=[0]*len(self.domain.attributes) 1964 for cond in rule.filter.conditions: 1965 ruleTab[cond.position]=1 1966 return map(operator.or_,ruleTab,self.argTab)==self.argTab 1967 1968 1969 class ruleCoversArguments: 1970 """ Class determines if rule covers one out of a set of arguments. """ 1971 def __init__(self, arguments): 1972 self.arguments = arguments 1973 self.indices = [] 1974 for a in self.arguments: 1975 indNA = getattr(a.filter,"indices",None) 1976 if not indNA: 1977 a.filter.setattr("indices", ruleCoversArguments.filterIndices(a.filter)) 1978 self.indices.append(a.filter.indices) 1979 1980 def __call__(self, rule): 1981 if not self.indices: 1982 return False 1983 if not getattr(rule.filter,"indices",None): 1984 rule.filter.indices = ruleCoversArguments.filterIndices(rule.filter) 1985 for index in self.indices: 1986 if map(operator.or_,rule.filter.indices,index) == rule.filter.indices: 1987 return True 1988 return False 1989 1990 def filterIndices(filter): 1991 if not filter.domain: 1992 return [] 1993 ind = [0]*len(filter.domain.attributes) 1994 for c in filter.conditions: 1995 ind[c.position]=operator.or_(ind[c.position], 1996 ruleCoversArguments.conditionIndex(c)) 1997 return ind 1998 filterIndices = staticmethod(filterIndices) 1999 2000 def conditionIndex(c): 2001 if type(c) == Orange.core.ValueFilter_continuous: 2002 if (c.oper == Orange.core.ValueFilter_continuous.GreaterEqual or 2003 c.oper == Orange.core.ValueFilter_continuous.Greater): 2004 return 5# 0101 2005 elif (c.oper == Orange.core.ValueFilter_continuous.LessEqual or 2006 c.oper == Orange.core.ValueFilter_continuous.Less): 2007 return 3 # 0011 2008 else: 2009 return c.oper 2010 else: 2011 return 1 # 0001 2012 conditionIndex = staticmethod(conditionIndex) 2013 2014 def oneSelectorToCover(ruleIndices, argIndices): 2015 at, type = 1, 0 2016 for r_i, ind in enumerate(ruleIndices): 2017 if not argIndices[r_i]: 2018 continue 2019 if at>1 and not ind == argIndices[r_i]: # need two changes 2020 return (1,0) 2021 if not ind == argIndices[r_i]: 2022 if argIndices[r_i] in [1,3,5]: 2023 at,type=r_i,argIndices[r_i] 2024 if argIndices[r_i]==6: 2025 if ind==3: 2026 at,type=r_i,5 2027 if ind==5: 2028 at,type=r_i,3 2029 return at,type 2030 oneSelectorToCover = staticmethod(oneSelectorToCover) 2031 2032 class SelectorAdder(Orange.core.RuleBeamRefiner): 2033 """ Selector adder, this function is a refiner function: 2034  refined rules are not consistent with any of negative arguments. """ 2035 def __init__(self, example=None, notAllowedSelectors=[], argumentID = None, 2036 discretizer = Orange.core.EntropyDiscretization(forceAttribute=True)): 2037 # required values  needed values of attributes 2038 self.example = example 2039 self.argumentID = argumentID 2040 self.notAllowedSelectors = notAllowedSelectors 2041 self.discretizer = discretizer 2042 2043 def __call__(self, oldRule, data, weightID, targetClass=1): 2044 inNotAllowedSelectors = ruleCoversArguments(self.notAllowedSelectors) 2045 newRules = Orange.core.RuleList() 2046 2047 # get positive indices (selectors already in the rule) 2048 indices = getattr(oldRule.filter,"indices",None) 2049 if not indices: 2050 indices = ruleCoversArguments.filterIndices(oldRule.filter) 2051 oldRule.filter.setattr("indices",indices) 2052 2053 # get negative indices (selectors that should not be in the rule) 2054 negativeIndices = [0]*len(data.domain.attributes) 2055 for nA in self.notAllowedSelectors: 2056 #print indices, nA.filter.indices 2057 at_i,type_na = ruleCoversArguments.oneSelectorToCover(indices, nA.filter.indices) 2058 if at_i>1: 2059 negativeIndices[at_i] = operator.or_(negativeIndices[at_i],type_na) 2060 2061 #iterate through indices = attributes 2062 for i,ind in enumerate(indices): 2063 if not self.example[i] or self.example[i].isSpecial(): 2064 continue 2065 if ind == 1: 2066 continue 2067 if data.domain[i].varType == Orange.core.VarTypes.Discrete and not negativeIndices[i]==1: # DISCRETE attribute 2068 if self.example: 2069 values = [self.example[i]] 2070 else: 2071 values = data.domain[i].values 2072 for v in values: 2073 tempRule = oldRule.clone() 2074 tempRule.filter.conditions.append(Orange.core.ValueFilter_discrete(position = i, 2075 values = [Orange.core.Value(data.domain[i],v)], 2076 acceptSpecial=0)) 2077 tempRule.complexity += 1 2078 tempRule.filter.indices[i] = 1 # 1 stands for discrete attribute (see ruleCoversArguments.conditionIndex) 2079 tempRule.filterAndStore(oldRule.examples, oldRule.weightID, targetClass) 2080 if len(tempRule.examples)<len(oldRule.examples): 2081 newRules.append(tempRule) 2082 elif data.domain[i].varType == Orange.core.VarTypes.Continuous and not negativeIndices[i]==7: # CONTINUOUS attribute 2083 try: 2084 at = data.domain[i] 2085 at_d = self.discretizer(at,oldRule.examples) 2086 except: 2087 continue # discretization failed ! 2088 # If discretization makes sense? then: 2089 if len(at_d.values)>1: 2090 for p in at_d.getValueFrom.transformer.points: 2091 #LESS 2092 if not negativeIndices[i]==3: 2093 tempRule = self.getTempRule(oldRule,i,Orange.core.ValueFilter_continuous.LessEqual,p,targetClass,3) 2094 if len(tempRule.examples)<len(oldRule.examples) and self.example[i]<=p:# and not inNotAllowedSelectors(tempRule): 2095 newRules.append(tempRule) 2096 #GREATER 2097 if not negativeIndices[i]==5: 2098 tempRule = self.getTempRule(oldRule,i,Orange.core.ValueFilter_continuous.Greater,p,targetClass,5) 2099 if len(tempRule.examples)<len(oldRule.examples) and self.example[i]>p:# and not inNotAllowedSelectors(tempRule): 2100 newRules.append(tempRule) 2101 for r in newRules: 2102 r.parentRule = oldRule 2103 r.valuesFilter = r.filter.filter 2104 return newRules 2105 2106 def getTempRule(self,oldRule,pos,oper,ref,targetClass,atIndex): 2107 tempRule = oldRule.clone() 2108 2109 tempRule.filter.conditions.append(Orange.core.ValueFilter_continuous(position=pos, 2110 oper=oper, 2111 ref=ref, 2112 acceptSpecial=0)) 2113 tempRule.complexity += 1 2114 tempRule.filter.indices[pos] = operator.or_(tempRule.filter.indices[pos],atIndex) # from ruleCoversArguments.conditionIndex 2115 tempRule.filterAndStore(oldRule.examples,tempRule.weightID,targetClass) 2116 return tempRule 2117 2118 def setCondition(self, oldRule, targetClass, ci, condition): 2119 tempRule = oldRule.clone() 2120 tempRule.filter.conditions[ci] = condition 2121 tempRule.filter.conditions[ci].setattr("specialized",1) 2122 tempRule.filterAndStore(oldRule.examples,oldRule.weightID,targetClass) 2123 return tempRule 2124 2125 2126 # This filter is the ugliest code ever! Problem is with Orange, I had some problems with inheriting deepCopy 2127 # I should take another look at it. 2128 class ArgFilter(Orange.core.Filter): 2129 """ This class implements ABcovering principle. """ 2130 def __init__(self, argumentID=None, filter = Orange.core.Filter_values()): 2131 self.filter = filter 2132 self.indices = getattr(filter,"indices",[]) 2133 if not self.indices and len(filter.conditions)>0: 2134 self.indices = ruleCoversArguments.filterIndices(filter) 2135 self.argumentID = argumentID 2136 self.debug = 0 2137 self.domain = self.filter.domain 2138 self.conditions = filter.conditions 2139 2140 def condIn(self,cond): # is condition in the filter? 2141 condInd = ruleCoversArguments.conditionIndex(cond) 2142 if operator.or_(condInd,self.indices[cond.position]) == self.indices[cond.position]: 2143 return True 2144 return False 2145 2146 def __call__(self,example): 2147 ## print "in", self.filter(example), self.filter.conditions[0](example) 2148 ## print self.filter.conditions[1].values 2149 if self.filter(example): 2150 try: 2151 if example[self.argumentID].value and len(example[self.argumentID].value.positiveArguments)>0: # example has positive arguments 2152 # conditions should cover at least one of the positive arguments 2153 oneArgCovered = False 2154 for pA in example[self.argumentID].value.positiveArguments: 2155 argCovered = [self.condIn(c) for c in pA.filter.conditions] 2156 oneArgCovered = oneArgCovered or len(argCovered) == sum(argCovered) #argCovered 2157 if oneArgCovered: 2158 break 2159 if not oneArgCovered: 2160 return False 2161 if example[self.argumentID].value and len(example[self.argumentID].value.negativeArguments)>0: # example has negative arguments 2162 # condition should not cover neither of negative arguments 2163 for pN in example[self.argumentID].value.negativeArguments: 2164 argCovered = [self.condIn(c) for c in pN.filter.conditions] 2165 if len(argCovered)==sum(argCovered): 2166 return False 2167 except: 2168 return True 2169 return True 2170 else: 2171 return False 2172 2173 def __setattr__(self,name,obj): 2174 self.__dict__[name]=obj 2175 self.filter.setattr(name,obj) 2176 2177 def deepCopy(self): 2178 newFilter = ArgFilter(argumentID=self.argumentID) 2179 newFilter.filter = Orange.core.Filter_values() #self.filter.deepCopy() 2180 newFilter.filter.conditions = self.filter.conditions[:] 2181 newFilter.domain = self.filter.domain 2182 newFilter.negate = self.filter.negate 2183 newFilter.conjunction = self.filter.conjunction 2184 newFilter.domain = self.filter.domain 2185 newFilter.conditions = newFilter.filter.conditions 2186 newFilter.indices = self.indices[:] 2187 if getattr(self,"candidateValues",None): 2188 newFilter.candidateValues = self.candidateValues[:] 2189 return newFilter 2190 2191 2192 class SelectorArgConditions(Orange.core.RuleBeamRefiner): 2193 """ Selector adder, this function is a refiner function: 2194  refined rules are not consistent with any of negative arguments. """ 2195 def __init__(self, example, allowed_selectors): 2196 # required values  needed values of attributes 2197 self.example = example 2198 self.allowed_selectors = allowed_selectors 2199 2200 def __call__(self, oldRule, data, weightID, targetClass=1): 2201 if len(oldRule.filter.conditions) >= len(self.allowed_selectors): 2202 return Orange.core.RuleList() 2203 newRules = Orange.core.RuleList() 2204 for c in self.allowed_selectors: 2205 # normal condition 2206 if not c.unspecialized_condition: 2207 tempRule = oldRule.clone() 2208 tempRule.filter.conditions.append(c) 2209 tempRule.filterAndStore(oldRule.examples, oldRule.weightID, targetClass) 2210 if len(tempRule.examples)<len(oldRule.examples): 2211 newRules.append(tempRule) 2212 # unspecified condition 2213 else: 2214 # find all possible example values 2215 vals = {} 2216 for e in oldRule.examples: 2217 if not e[c.position].isSpecial(): 2218 vals[str(e[c.position])] = 1 2219 values = vals.keys() 2220 # for each value make a condition 2221 for v in values: 2222 tempRule = oldRule.clone() 2223 tempRule.filter.conditions.append(Orange.core.ValueFilter_continuous(position=c.position, 2224 oper=c.oper, 2225 ref=float(v), 2226 acceptSpecial=0)) 2227 if tempRule(self.example): 2228 tempRule.filterAndStore(oldRule.examples, oldRule.weightID, targetClass) 2229 if len(tempRule.examples)<len(oldRule.examples): 2230 newRules.append(tempRule) 2231 ## print " NEW RULES " 2232 ## for r in newRules: 2233 ## print Orange.classification.rules.ruleToString(r) 2234 for r in newRules: 2235 r.parentRule = oldRule 2236 ## print Orange.classification.rules.ruleToString(r) 2237 return newRules 2238 2239 2240 # ********************** # 2241 # Probabilistic covering # 2242 # ********************** # 2243 2244 class CovererAndRemover_Prob(Orange.core.RuleCovererAndRemover): 2245 """ This class impements probabilistic covering. """ 2246 2247 def __init__(self, examples, weightID, targetClass, apriori): 2248 self.bestRule = [None]*len(examples) 2249 self.probAttribute = Orange.core.newmetaid() 2250 self.aprioriProb = apriori[targetClass]/apriori.abs 2251 examples.addMetaAttribute(self.probAttribute, self.aprioriProb) 2252 examples.domain.addmeta(self.probAttribute, Orange.core.FloatVariable("Probs")) 2253 2254 def getBestRules(self, currentRules, examples, weightID): 2255 bestRules = Orange.core.RuleList() 2256 ## for r in currentRules: 2257 ## if hasattr(r.learner, "argumentRule") and not Orange.classification.rules.rule_in_set(r,bestRules): 2258 ## bestRules.append(r) 2259 for r_i,r in enumerate(self.bestRule): 2260 if r and not Orange.classification.rules.rule_in_set(r,bestRules) and int(examples[r_i].getclass())==int(r.classifier.defaultValue): 2261 bestRules.append(r) 2262 return bestRules 2263 2264 def __call__(self, rule, examples, weights, targetClass): 2265 if hasattr(rule, "learner") and hasattr(rule.learner, "arg_example"): 2266 example = rule.learner.arg_example 2267 else: 2268 example = None 2269 for ei, e in enumerate(examples): 2270 ## if e == example: 2271 ## e[self.probAttribute] = 1.0 2272 ## self.bestRule[ei]=rule 2273 if example and not (hasattr(self.bestRule[ei], "learner") and hasattr(self.bestRule[ei].learner, "arg_example")): 2274 can_be_worst = True 2275 else: 2276 can_be_worst = False 2277 if can_be_worst and rule(e) and rule.quality>(e[self.probAttribute]0.01): 2278 e[self.probAttribute] = rule.quality+0.001 # 0.001 is added to avoid numerical errors 2279 self.bestRule[ei]=rule 2280 elif rule(e) and rule.quality>e[self.probAttribute]: 2281 e[self.probAttribute] = rule.quality+0.001 # 0.001 is added to avoid numerical errors 2282 self.bestRule[ei]=rule 2283 return (examples,weights) 2284 2285 def covered_percentage(self, examples): 2286 p = 0.0 2287 for ei, e in enumerate(examples): 2288 p += (e[self.probAttribute]  self.aprioriProb)/(1.0self.aprioriProb) 2289 return p/len(examples) 2290 2291 2292 # **************************************** # 2293 # Estimation of extreme value distribution # 2294 # **************************************** # 2295 2296 # Miscellaneous  utility functions 2297 def avg(l): 2298 return sum(l)/len(l) if l else 0. 2299 2300 def var(l): 2301 if len(l)<2: 2302 return 0. 2303 av = avg(l) 2304 return sum([math.pow(liav,2) for li in l])/(len(l)1) 2305 2306 def perc(l,p): 2307 l.sort() 2308 return l[int(math.floor(p*len(l)))] 2309 2310 class EVDFitter: 2311 """ Randomizes a dataset and fits an extreme value distribution onto it. """ 2312 2313 def __init__(self, learner, n=200, randomseed=100): 2314 self.learner = learner 2315 self.n = n 2316 self.randomseed = randomseed 2317 2318 def createRandomDataSet(self, data): 2319 newData = Orange.core.ExampleTable(data) 2320 # shuffle data 2321 cl_num = newData.toNumpy("C") 2322 random.shuffle(cl_num[0][:,0]) 2323 clData = Orange.core.ExampleTable(Orange.core.Domain([newData.domain.classVar]),cl_num[0]) 2324 for d_i,d in enumerate(newData): 2325 d[newData.domain.classVar] = clData[d_i][newData.domain.classVar] 2326 return newData 2327 2328 def createEVDistList(self, evdList): 2329 l = Orange.core.EVDistList() 2330 for el in evdList: 2331 l.append(Orange.core.EVDist(mu=el[0],beta=el[1],percentiles=el[2])) 2332 return l 2333 2334 # estimated fisher tippett parameters for a set of values given in vals list (+ deciles) 2335 def compParameters(self, vals, oldMi=0.5,oldBeta=1.1): 2336 # compute percentiles 2337 vals.sort() 2338 N = len(vals) 2339 percs = [avg(vals[int(float(N)*i/10):int(float(N)*(i+1)/10)]) for i in range(10)] 2340 if N<10: 2341 return oldMi, oldBeta, percs 2342 beta = min(2.0, max(oldBeta, math.sqrt(6*var(vals)/math.pow(math.pi,2)))) 2343 mi = max(oldMi,percs[1]+beta*math.log(math.log(0.95))) 2344 return mi, beta, percs 2345 2346 def prepare_learner(self): 2347 self.oldStopper = self.learner.ruleFinder.ruleStoppingValidator 2348 self.evaluator = self.learner.ruleFinder.evaluator 2349 self.refiner = self.learner.ruleFinder.refiner 2350 self.validator = self.learner.ruleFinder.validator 2351 self.ruleFilter = self.learner.ruleFinder.ruleFilter 2352 self.learner.ruleFinder.validator = None 2353 self.learner.ruleFinder.evaluator = Orange.core.RuleEvaluator_LRS() 2354 self.learner.ruleFinder.evaluator.storeRules = True 2355 self.learner.ruleFinder.ruleStoppingValidator = Orange.core.RuleValidator_LRS(alpha=1.0) 2356 self.learner.ruleFinder.ruleStoppingValidator.max_rule_complexity = 0 2357 self.learner.ruleFinder.refiner = Orange.core.RuleBeamRefiner_Selector() 2358 self.learner.ruleFinder.ruleFilter = Orange.core.RuleBeamFilter_Width(width = 1) 2359 2360 2361 def restore_learner(self): 2362 self.learner.ruleFinder.evaluator = self.evaluator 2363 self.learner.ruleFinder.ruleStoppingValidator = self.oldStopper 2364 self.learner.ruleFinder.refiner = self.refiner 2365 self.learner.ruleFinder.validator = self.validator 2366 self.learner.ruleFinder.ruleFilter = self.ruleFilter 2367 2368 def computeEVD(self, data, weightID=0, target_class=0, progress=None): 2369 # initialize random seed to make experiments repeatable 2370 random.seed(self.randomseed) 2371 2372 # prepare learned for distribution computation 2373 self.prepare_learner() 2374 2375 # loop through N (sampling repetitions) 2376 extremeDists=[(0, 1, [])] 2377 self.learner.ruleFinder.ruleStoppingValidator.max_rule_complexity = self.oldStopper.max_rule_complexity 2378 maxVals = [[] for l in range(self.oldStopper.max_rule_complexity)] 2379 for d_i in range(self.n): 2380 if not progress: 2381 print d_i, 2382 else: 2383 progress(float(d_i)/self.n, None) 2384 # create data set (remove and randomize) 2385 tempData = self.createRandomDataSet(data) 2386 self.learner.ruleFinder.evaluator.rules = Orange.core.RuleList() 2387 # Next, learn a rule 2388 self.learner.ruleFinder(tempData,weightID,target_class, Orange.core.RuleList()) 2389 for l in range(self.oldStopper.max_rule_complexity): 2390 qs = [r.quality for r in self.learner.ruleFinder.evaluator.rules if r.complexity == l+1] 2391 if qs: 2392 maxVals[l].append(max(qs)) 2393 else: 2394 maxVals[l].append(0) 2395 2396 mu, beta = 1.0, 1.0 2397 for mi,m in enumerate(maxVals): 2398 mu, beta, perc = self.compParameters(m,mu,beta) 2399 extremeDists.append((mu, beta, perc)) 2400 extremeDists.extend([(0,1,[])]*(mi)) 2401 2402 self.restore_learner() 2403 return self.createEVDistList(extremeDists) 2404 2405 # ************************* # 2406 # Rule based classification # 2407 # ************************* # 2408 2409 class CrossValidation: 2410 def __init__(self, folds=5, randomGenerator = 150): 2411 self.folds = folds 2412 self.randomGenerator = randomGenerator 2413 2414 def __call__(self, learner, examples, weight): 2415 res = orngTest.crossValidation([learner], (examples, weight), folds = self.folds, randomGenerator = self.randomGenerator) 2416 return self.get_prob_from_res(res, examples) 2417 2418 def get_prob_from_res(self, res, examples): 2419 probDist = Orange.core.DistributionList() 2420 for tex in res.results: 2421 d = Orange.core.Distribution(examples.domain.classVar) 2422 for di in range(len(d)): 2423 d[di] = tex.probabilities[0][di] 2424 probDist.append(d) 2425 return probDist 2426 2427 class PILAR: 2428 """ PILAR (Probabilistic improvement of learning algorithms with rules) """ 2429 def __init__(self, alternative_learner = None, min_cl_sig = 0.5, min_beta = 0.0, set_prefix_rules = False, optimize_betas = True): 2430 self.alternative_learner = alternative_learner 2431 self.min_cl_sig = min_cl_sig 2432 self.min_beta = min_beta 2433 self.set_prefix_rules = set_prefix_rules 2434 self.optimize_betas = optimize_betas 2435 self.selected_evaluation = CrossValidation(folds=5) 2436 2437 def __call__(self, rules, examples, weight=0): 2438 rules = self.add_null_rule(rules, examples, weight) 2439 if self.alternative_learner: 2440 probDist = self.selected_evaluation(self.alternative_learner, examples, weight) 2441 classifier = self.alternative_learner(examples,weight) 2442 ## probDist = Orange.core.DistributionList() 2443 ## for e in examples: 2444 ## probDist.append(classifier(e,Orange.core.GetProbabilities)) 2445 cl = Orange.core.RuleClassifier_logit(rules, self.min_cl_sig, self.min_beta, examples, weight, self.set_prefix_rules, self.optimize_betas, classifier, probDist) 2446 else: 2447 cl = Orange.core.RuleClassifier_logit(rules, self.min_cl_sig, self.min_beta, examples, weight, self.set_prefix_rules, self.optimize_betas) 2448 2449 ## print "result" 2450 for ri,r in enumerate(cl.rules): 2451 cl.rules[ri].setattr("beta",cl.ruleBetas[ri]) 2452 ## if cl.ruleBetas[ri] > 0: 2453 ## print Orange.classification.rules.ruleToString(r), r.quality, cl.ruleBetas[ri] 2454 cl.all_rules = cl.rules 2455 cl.rules = self.sortRules(cl.rules) 2456 cl.ruleBetas = [r.beta for r in cl.rules] 2457 cl.setattr("data", examples) 2458 return cl 2459 2460 def add_null_rule(self, rules, examples, weight): 2461 for cl in examples.domain.classVar: 2462 tmpRle = Orange.core.Rule() 2463 tmpRle.filter = Orange.core.Filter_values(domain = examples.domain) 2464 tmpRle.parentRule = None 2465 tmpRle.filterAndStore(examples,weight,int(cl)) 2466 tmpRle.quality = tmpRle.classDistribution[int(cl)]/tmpRle.classDistribution.abs 2467 rules.append(tmpRle) 2468 return rules 2469 2470 def sortRules(self, rules): 2471 newRules = Orange.core.RuleList() 2472 foundRule = True 2473 while foundRule: 2474 foundRule = False 2475 bestRule = None 2476 for r in rules: 2477 if r in newRules: 2478 continue 2479 if r.beta < 0.01 and r.beta > 0.01: 2480 continue 2481 if not bestRule: 2482 bestRule = r 2483 foundRule = True 2484 continue 2485 if len(r.filter.conditions) < len(bestRule.filter.conditions): 2486 bestRule = r 2487 foundRule = True 2488 continue 2489 if len(r.filter.conditions) == len(bestRule.filter.conditions) and r.beta > bestRule.beta: 2490 bestRule = r 2491 foundRule = True 2492 continue 2493 if bestRule: 2494 newRules.append(bestRule) 2495 return newRules 2496 2497 2498 class CN2UnorderedClassifier(Orange.core.RuleClassifier): 2499 """ Classification from rules as in CN2. """ 2500 def __init__(self, rules, examples, weightID = 0, **argkw): 2501 self.rules = rules 2502 self.examples = examples 2503 self.weightID = weightID 2504 self.prior = Orange.core.Distribution(examples.domain.classVar, examples, weightID) 2505 self.__dict__.update(argkw) 2506 2507 def __call__(self, example, result_type=Orange.core.GetValue, retRules = False): 2508 # iterate through the set of induced rules: self.rules and sum their distributions 2509 ret_dist = self.sum_distributions([r for r in self.rules if r(example)]) 2510 # normalize 2511 a = sum(ret_dist) 2512 for ri, r in enumerate(ret_dist): 2513 ret_dist[ri] = ret_dist[ri]/a 2514 ## ret_dist.normalize() 2515 # return value 2516 if result_type == Orange.core.GetValue: 2517 return ret_dist.modus() 2518 if result_type == Orange.core.GetProbabilities: 2519 return ret_dist 2520 return (ret_dist.modus(),ret_dist) 2521 2522 def sum_distributions(self, rules): 2523 if not rules: 2524 return self.prior 2525 empty_disc = Orange.core.Distribution(rules[0].examples.domain.classVar) 2526 for r in rules: 2527 for i,d in enumerate(r.classDistribution): 2528 empty_disc[i] = empty_disc[i] + d 2529 return empty_disc 2530 2531 def __str__(self): 2532 retStr = "" 2533 for r in self.rules: 2534 retStr += Orange.classification.rules.ruleToString(r)+" "+str(r.classDistribution)+"\n" 2535 return retStr 2536 2537 2538 class RuleClassifier_bestRule(Orange.core.RuleClassifier): 2539 """ A very simple classifier, it takes the best rule of each class and normalizes probabilities. """ 2540 def __init__(self, rules, examples, weightID = 0, **argkw): 2541 self.rules = rules 2542 self.examples = examples 2543 self.apriori = Orange.core.Distribution(examples.domain.classVar,examples,weightID) 2544 self.aprioriProb = [a/self.apriori.abs for a in self.apriori] 2545 self.weightID = weightID 2546 self.__dict__.update(argkw) 2547 self.defaultClassIndex = 1 2548 2549 def __call__(self, example, result_type=Orange.core.GetValue, retRules = False): 2550 example = Orange.core.Example(self.examples.domain,example) 2551 tempDist = Orange.core.Distribution(example.domain.classVar) 2552 bestRules = [None]*len(example.domain.classVar.values) 2553 2554 for r in self.rules: 2555 if r(example) and not self.defaultClassIndex == int(r.classifier.defaultVal) and \ 2556 (not bestRules[int(r.classifier.defaultVal)] or r.quality>tempDist[r.classifier.defaultVal]): 2557 tempDist[r.classifier.defaultVal] = r.quality 2558 bestRules[int(r.classifier.defaultVal)] = r 2559 for b in bestRules: 2560 if b: 2561 used = getattr(b,"used",0.0) 2562 b.setattr("used",used+1) 2563 nonCovPriorSum = sum([tempDist[i] == 0. and self.aprioriProb[i] or 0. for i in range(len(self.aprioriProb))]) 2564 if tempDist.abs < 1.: 2565 residue = 1.  tempDist.abs 2566 for a_i,a in enumerate(self.aprioriProb): 2567 if tempDist[a_i] == 0.: 2568 tempDist[a_i]=self.aprioriProb[a_i]*residue/nonCovPriorSum 2569 finalDist = tempDist #Orange.core.Distribution(example.domain.classVar) 2570 else: 2571 tempDist.normalize() # prior probability 2572 tmpExamples = Orange.core.ExampleTable(self.examples) 2573 for r in bestRules: 2574 if r: 2575 tmpExamples = r.filter(tmpExamples) 2576 tmpDist = Orange.core.Distribution(tmpExamples.domain.classVar,tmpExamples,self.weightID) 2577 tmpDist.normalize() 2578 probs = [0.]*len(self.examples.domain.classVar.values) 2579 for i in range(len(self.examples.domain.classVar.values)): 2580 probs[i] = tmpDist[i]+tempDist[i]*2 2581 finalDist = Orange.core.Distribution(self.examples.domain.classVar) 2582 for cl_i,cl in enumerate(self.examples.domain.classVar): 2583 finalDist[cl] = probs[cl_i] 2584 finalDist.normalize() 2585 2586 if retRules: # Do you want to return rules with classification? 2587 if result_type == Orange.core.GetValue: 2588 return (finalDist.modus(),bestRules) 2589 if result_type == Orange.core.GetProbabilities: 2590 return (finalDist, bestRules) 2591 return (finalDist.modus(),finalDist, bestRules) 2592 if result_type == Orange.core.GetValue: 2593 return finalDist.modus() 2594 if result_type == Orange.core.GetProbabilities: 2595 return finalDist 2596 return (finalDist.modus(),finalDist) 
orange/orngABCN2.py
r7651 r7675 3 3 4 4 5 import operator 6 import random 7 import numpy 8 import math 9 10 # The following is an ugly hack to make the cyclic dependencies work. 11 # Cyclic dependencies are introduced by Orange's __init__.py importing all 12 # its submodules, one of which currently depends on this module. 13 import sys 14 if "Orange" not in sys.modules: 15 del sys.modules['orngABCN2'] 16 import Orange 17 18 import Orange.core 19 import Orange.classification.rules 5 from Orange.classification.rules import DefaultLearner 6 from Orange.classification.rules import ABCN2 7 from Orange.classification.rules import ABCN2Ordered 8 from Orange.classification.rules import ABCN2M 9 from Orange.classification.rules import ABBeamFilter 10 from Orange.classification.rules import ruleCoversArguments 11 from Orange.classification.rules import SelectorAdder 12 from Orange.classification.rules import ArgFilter 13 from Orange.classification.rules import SelectorArgConditions 14 from Orange.classification.rules import CovererAndRemover_Prob 15 from Orange.classification.rules import avg 16 from Orange.classification.rules import var 17 from Orange.classification.rules import perc 18 from Orange.classification.rules import EVDFitter 19 from Orange.classification.rules import CrossValidation 20 from Orange.classification.rules import PILAR 21 from Orange.classification.rules import CN2UnorderedClassifier 20 22 from orngABML import * 21 22 # Default learner  returns #23 # default classifier with pre #24 # defined output class #25 class DefaultLearner(Orange.core.Learner):26 def __init__(self,defaultValue = None):27 self.defaultValue = defaultValue28 def __call__(self,examples,weightID=0):29 return Orange.core.DefaultClassifier(self.defaultValue,defaultDistribution = Orange.core.Distribution(examples.domain.classVar,examples,weightID))30 31 32 # Main ABCN2 class33 class ABCN2(Orange.core.RuleLearner):34 """This is implementation of ABCN2 + EVC as evaluation + LRC classification.35 """36 37 def __init__(self, argumentID=0, width=5, m=2, opt_reduction=2, nsampling=100, max_rule_complexity=5,38 rule_sig=1.0, att_sig=1.0, postpruning=None, min_quality=0., min_coverage=1, min_improved=1, min_improved_perc=0.0,39 learn_for_class = None, learn_one_rule = False, evd=None, evd_arguments=None, prune_arguments=False, analyse_argument=1,40 alternative_learner = None, min_cl_sig = 0.5, min_beta = 0.0, set_prefix_rules = False, add_sub_rules = False,41 **kwds):42 """43 Parameters:44 General rule learning:45 width ... beam width (default 5)46 learn_for_class ... learner rules for one class? otherwise None47 learn_one_rule ... learn one rule only ?48 analyse_argument ... learner only analyses argument with this index; if set to 1, then it learns normally49 50 Evaluator related:51 m ... mestimate to be corrected with EVC (default 2)52 opt_reduction ... types of EVC correction; 0=no correction, 1=pessimistic, 2=normal (default 2)53 nsampling ... number of samples in estimating extreme value distribution (for EVC) (default 100)54 evd ... pre given extreme value distributions55 evd_arguments ... pre given extreme value distributions for arguments56 57 Rule Validation:58 rule_sig ... minimal rule significance (default 1.0)59 att_sig ... minimal attribute significance in rule (default 1.0)60 max_rule_complexity ... maximum number of conditions in rule (default 5)61 min_coverage ... minimal number of covered examples (default 5)62 63 Probabilistic covering:64 min_improved ... minimal number of examples improved in probabilistic covering (default 1)65 min_improved_perc ... minimal percentage of covered examples that need to be improved (default 0.0)66 67 Classifier (LCR) related:68 add_sub_rules ... add sub rules ? (default False)69 min_cl_sig ... minimal significance of beta in classifier (default 0.5)70 min_beta ... minimal beta value (default 0.0)71 set_prefix_rules ... should ordered prefix rules be added? (default False)72 alternative_learner ... use rulelearner as a correction method for other machine learning methods. (default None)73 74 """75 76 77 # argument ID which is passed to abcn2 learner78 self.argumentID = argumentID79 # learn for specific class only?80 self.learn_for_class = learn_for_class81 # only analysing a specific argument or learning all at once82 self.analyse_argument = analyse_argument83 # should we learn only one rule?84 self.learn_one_rule = learn_one_rule85 self.postpruning = postpruning86 # rule finder87 self.ruleFinder = Orange.core.RuleBeamFinder()88 self.ruleFilter = Orange.core.RuleBeamFilter_Width(width=width)89 self.ruleFilter_arguments = ABBeamFilter(width=width)90 if max_rule_complexity  1 < 0:91 max_rule_complexity = 1092 self.ruleFinder.ruleStoppingValidator = Orange.core.RuleValidator_LRS(alpha = 1.0, min_quality = 0., max_rule_complexity = max_rule_complexity  1, min_coverage=min_coverage)93 self.refiner = Orange.core.RuleBeamRefiner_Selector()94 self.refiner_arguments = SelectorAdder(discretizer = Orange.core.EntropyDiscretization(forceAttribute = 1,95 maxNumberOfIntervals = 2))96 self.prune_arguments = prune_arguments97 # evc evaluator98 evdGet = Orange.core.EVDistGetter_Standard()99 self.ruleFinder.evaluator = Orange.core.RuleEvaluator_mEVC(m=m, evDistGetter = evdGet, min_improved = min_improved, min_improved_perc = min_improved_perc)100 self.ruleFinder.evaluator.returnExpectedProb = True101 self.ruleFinder.evaluator.optimismReduction = opt_reduction102 self.ruleFinder.evaluator.ruleAlpha = rule_sig103 self.ruleFinder.evaluator.attributeAlpha = att_sig104 self.ruleFinder.evaluator.validator = Orange.core.RuleValidator_LRS(alpha = 1.0, min_quality = min_quality, min_coverage=min_coverage, max_rule_complexity = max_rule_complexity  1)105 106 # learn stopping criteria107 self.ruleStopping = None108 self.dataStopping = Orange.core.RuleDataStoppingCriteria_NoPositives()109 # evd fitting110 self.evd_creator = EVDFitter(self,n=nsampling)111 self.evd = evd112 self.evd_arguments = evd_arguments113 # classifier114 self.add_sub_rules = add_sub_rules115 self.classifier = PILAR(alternative_learner = alternative_learner, min_cl_sig = min_cl_sig, min_beta = min_beta, set_prefix_rules = set_prefix_rules)116 # arbitrary parameters117 self.__dict__.update(kwds)118 119 120 def __call__(self, examples, weightID=0):121 # initialize progress bar122 progress=getattr(self,"progressCallback",None)123 if progress:124 progress.start = 0.0125 progress.end = 0.0126 distrib = Orange.core.Distribution(examples.domain.classVar, examples, weightID)127 distrib.normalize()128 129 # we begin with an empty set of rules130 all_rules = Orange.core.RuleList()131 132 # th en, iterate through all classes and learn rule for each class separately133 for cl_i,cl in enumerate(examples.domain.classVar):134 if progress:135 step = distrib[cl] / 2.136 progress.start = progress.end137 progress.end += step138 139 if self.learn_for_class and not self.learn_for_class in [cl,cl_i]:140 continue141 142 # rules for this class only143 rules, arg_rules = Orange.core.RuleList(), Orange.core.RuleList()144 145 # create dichotomous class146 dich_data = self.create_dich_class(examples, cl)147 148 # preparation of the learner (covering, evd, etc.)149 self.prepare_settings(dich_data, weightID, cl_i, progress)150 151 # learn argumented rules first ...152 self.turn_ABML_mode(dich_data, weightID, cl_i)153 # first specialize all unspecialized arguments154 # dich_data = self.specialise_arguments(dich_data, weightID)155 # comment: specialisation of arguments is within learning of an argumented rule;156 # this is now different from the published algorithm157 if progress:158 progress.start = progress.end159 progress.end += step160 161 aes = self.get_argumented_examples(dich_data)162 aes = self.sort_arguments(aes, dich_data)163 while aes:164 if self.analyse_argument > 1 and not dich_data[self.analyse_argument] == aes[0]:165 aes = aes[1:]166 continue167 ae = aes[0]168 rule = self.learn_argumented_rule(ae, dich_data, weightID) # target class is always first class (0)169 if not progress:170 print "learned rule", Orange.classification.rules.ruleToString(rule)171 if rule:172 arg_rules.append(rule)173 aes = filter(lambda x: not rule(x), aes)174 else:175 aes = aes[1:]176 if not progress:177 print " arguments finished ... "178 179 # remove all examples covered by rules180 ## for rule in rules:181 ## dich_data = self.remove_covered_examples(rule, dich_data, weightID)182 ## if progress:183 ## progress(self.remaining_probability(dich_data),None)184 185 # learn normal rules on remaining examples186 if self.analyse_argument == 1:187 self.turn_normal_mode(dich_data, weightID, cl_i)188 while dich_data:189 # learn a rule190 rule = self.learn_normal_rule(dich_data, weightID, self.apriori)191 if not rule:192 break193 if not progress:194 print "rule learned: ", Orange.classification.rules.ruleToString(rule), rule.quality195 dich_data = self.remove_covered_examples(rule, dich_data, weightID)196 if progress:197 progress(self.remaining_probability(dich_data),None)198 rules.append(rule)199 if self.learn_one_rule:200 break201 202 for r in arg_rules:203 dich_data = self.remove_covered_examples(r, dich_data, weightID)204 rules.append(r)205 206 # prune unnecessary rules207 rules = self.prune_unnecessary_rules(rules, dich_data, weightID)208 209 if self.add_sub_rules:210 rules = self.add_sub_rules_call(rules, dich_data, weightID)211 212 # restore domain and class in rules, add them to all_rules213 for r in rules:214 all_rules.append(self.change_domain(r, cl, examples, weightID))215 216 if progress:217 progress(1.0,None)218 # create a classifier from all rules219 return self.create_classifier(all_rules, examples, weightID)220 221 def learn_argumented_rule(self, ae, examples, weightID):222 # prepare roots of rules from arguments223 positive_args = self.init_pos_args(ae, examples, weightID)224 if not positive_args: # something wrong225 raise "There is a problem with argumented example %s"%str(ae)226 return None227 negative_args = self.init_neg_args(ae, examples, weightID)228 229 # set negative arguments in refiner230 self.ruleFinder.refiner.notAllowedSelectors = negative_args231 self.ruleFinder.refiner.example = ae232 # set arguments to filter233 self.ruleFinder.ruleFilter.setArguments(examples.domain,positive_args)234 235 # learn a rule236 self.ruleFinder.evaluator.bestRule = None237 self.ruleFinder.evaluator.returnBestFuture = True238 self.ruleFinder(examples,weightID,0,positive_args)239 ## self.ruleFinder.evaluator.bestRule.quality = 0.8240 241 # return best rule242 return self.ruleFinder.evaluator.bestRule243 244 def prepare_settings(self, examples, weightID, cl_i, progress):245 # apriori distribution246 self.apriori = Orange.core.Distribution(examples.domain.classVar,examples,weightID)247 248 # prepare covering mechanism249 self.coverAndRemove = CovererAndRemover_Prob(examples, weightID, 0, self.apriori)250 self.ruleFinder.evaluator.probVar = examples.domain.getmeta(self.coverAndRemove.probAttribute)251 252 # compute extreme distributions253 # TODO: why evd and evd_this????254 if self.ruleFinder.evaluator.optimismReduction > 0 and not self.evd:255 self.evd_this = self.evd_creator.computeEVD(examples, weightID, target_class=0, progress = progress)256 if self.evd:257 self.evd_this = self.evd[cl_i]258 259 def turn_ABML_mode(self, examples, weightID, cl_i):260 # evaluator261 if self.ruleFinder.evaluator.optimismReduction > 0 and self.argumentID:262 if self.evd_arguments:263 self.ruleFinder.evaluator.evDistGetter.dists = self.evd_arguments[cl_i]264 else:265 self.ruleFinder.evaluator.evDistGetter.dists = self.evd_this # self.evd_creator.computeEVD_example(examples, weightID, target_class=0)266 # rule refiner267 self.ruleFinder.refiner = self.refiner_arguments268 self.ruleFinder.refiner.argumentID = self.argumentID269 self.ruleFinder.ruleFilter = self.ruleFilter_arguments270 271 def create_dich_class(self, examples, cl):272 """ create dichotomous class. """273 (newDomain, targetVal) = createDichotomousClass(examples.domain, examples.domain.classVar, str(cl), negate=0)274 newDomainmetas = newDomain.getmetas()275 newDomain.addmeta(Orange.core.newmetaid(), examples.domain.classVar) # old class as meta276 dichData = examples.select(newDomain)277 if self.argumentID:278 for d in dichData: # remove arguments given to other classes279 if not d.getclass() == targetVal:280 d[self.argumentID] = "?"281 return dichData282 283 def get_argumented_examples(self, examples):284 if not self.argumentID:285 return None286 287 # get argumentated examples288 return ArgumentFilter_hasSpecial()(examples, self.argumentID, targetClass = 0)289 290 def sort_arguments(self, arg_examples, examples):291 if not self.argumentID:292 return None293 evaluateAndSortArguments(examples, self.argumentID)294 if len(arg_examples)>0:295 # sort examples by their arguments quality (using first argument as it has already been sorted)296 sorted = arg_examples.native()297 sorted.sort(lambda x,y: cmp(x[self.argumentID].value.positiveArguments[0].quality,298 y[self.argumentID].value.positiveArguments[0].quality))299 return Orange.core.ExampleTable(examples.domain, sorted)300 else:301 return None302 303 def turn_normal_mode(self, examples, weightID, cl_i):304 # evaluator305 if self.ruleFinder.evaluator.optimismReduction > 0:306 if self.evd:307 self.ruleFinder.evaluator.evDistGetter.dists = self.evd[cl_i]308 else:309 self.ruleFinder.evaluator.evDistGetter.dists = self.evd_this # self.evd_creator.computeEVD(examples, weightID, target_class=0)310 # rule refiner311 self.ruleFinder.refiner = self.refiner312 self.ruleFinder.ruleFilter = self.ruleFilter313 314 def learn_normal_rule(self, examples, weightID, apriori):315 if hasattr(self.ruleFinder.evaluator, "bestRule"):316 self.ruleFinder.evaluator.bestRule = None317 rule = self.ruleFinder(examples,weightID,0,Orange.core.RuleList())318 if hasattr(self.ruleFinder.evaluator, "bestRule") and self.ruleFinder.evaluator.returnExpectedProb:319 rule = self.ruleFinder.evaluator.bestRule320 self.ruleFinder.evaluator.bestRule = None321 if self.postpruning:322 rule = self.postpruning(rule,examples,weightID,0, aprior)323 return rule324 325 def remove_covered_examples(self, rule, examples, weightID):326 nexamples, nweight = self.coverAndRemove(rule,examples,weightID,0)327 return nexamples328 329 330 def prune_unnecessary_rules(self, rules, examples, weightID):331 return self.coverAndRemove.getBestRules(rules,examples,weightID)332 333 def change_domain(self, rule, cl, examples, weightID):334 rule.examples = rule.examples.select(examples.domain)335 rule.classDistribution = Orange.core.Distribution(rule.examples.domain.classVar,rule.examples,weightID) # adapt distribution336 rule.classifier = Orange.core.DefaultClassifier(cl) # adapt classifier337 rule.filter = Orange.core.Filter_values(domain = examples.domain,338 conditions = rule.filter.conditions)339 if hasattr(rule, "learner") and hasattr(rule.learner, "arg_example"):340 rule.learner.arg_example = Orange.core.Example(examples.domain, rule.learner.arg_example)341 return rule342 343 def create_classifier(self, rules, examples, weightID):344 return self.classifier(rules, examples, weightID)345 346 def add_sub_rules_call(self, rules, examples, weightID):347 apriori = Orange.core.Distribution(examples.domain.classVar,examples,weightID)348 newRules = Orange.core.RuleList()349 for r in rules:350 newRules.append(r)351 352 # loop through rules353 for r in rules:354 tmpList = Orange.core.RuleList()355 tmpRle = r.clone()356 tmpRle.filter.conditions = r.filter.conditions[:r.requiredConditions] # do not split argument357 tmpRle.parentRule = None358 tmpRle.filterAndStore(examples,weightID,r.classifier.defaultVal)359 tmpRle.complexity = 0360 tmpList.append(tmpRle)361 while tmpList and len(tmpList[0].filter.conditions) <= len(r.filter.conditions):362 tmpList2 = Orange.core.RuleList()363 for tmpRule in tmpList:364 # evaluate tmpRule365 oldREP = self.ruleFinder.evaluator.returnExpectedProb366 self.ruleFinder.evaluator.returnExpectedProb = False367 tmpRule.quality = self.ruleFinder.evaluator(tmpRule,examples,weightID,r.classifier.defaultVal,apriori)368 self.ruleFinder.evaluator.returnExpectedProb = oldREP369 # if rule not in rules already, add it to the list370 if not True in [Orange.classification.rules.rules_equal(ri,tmpRule) for ri in newRules] and len(tmpRule.filter.conditions)>0 and tmpRule.quality > apriori[r.classifier.defaultVal]/apriori.abs:371 newRules.append(tmpRule)372 # create new tmpRules, set parent Rule, append them to tmpList2373 if not True in [Orange.classification.rules.rules_equal(ri,tmpRule) for ri in newRules]:374 for c in r.filter.conditions:375 tmpRule2 = tmpRule.clone()376 tmpRule2.parentRule = tmpRule377 tmpRule2.filter.conditions.append(c)378 tmpRule2.filterAndStore(examples,weightID,r.classifier.defaultVal)379 tmpRule2.complexity += 1380 if tmpRule2.classDistribution.abs < tmpRule.classDistribution.abs:381 tmpList2.append(tmpRule2)382 tmpList = tmpList2383 return newRules384 385 386 def init_pos_args(self, ae, examples, weightID):387 pos_args = Orange.core.RuleList()388 # prepare arguments389 for p in ae[self.argumentID].value.positiveArguments:390 new_arg = Orange.core.Rule(filter=ArgFilter(argumentID = self.argumentID,391 filter = self.newFilter_values(p.filter)),392 complexity = 0)393 new_arg.valuesFilter = new_arg.filter.filter394 pos_args.append(new_arg)395 396 397 if hasattr(self.ruleFinder.evaluator, "returnExpectedProb"):398 old_exp = self.ruleFinder.evaluator.returnExpectedProb399 self.ruleFinder.evaluator.returnExpectedProb = False400 401 # argument pruning (all or just unfinished arguments)402 # if pruning is chosen, then prune arguments if possible403 for p in pos_args:404 p.filterAndStore(examples, weightID, 0)405 # pruning on: we check on all conditions and take only best406 if self.prune_arguments:407 allowed_conditions = [c for c in p.filter.conditions]408 pruned_conditions = self.prune_arg_conditions(ae, allowed_conditions, examples, weightID)409 p.filter.conditions = pruned_conditions410 else: # prune only unspecified conditions411 spec_conditions = [c for c in p.filter.conditions if not c.unspecialized_condition]412 unspec_conditions = [c for c in p.filter.conditions if c.unspecialized_condition]413 # let rule cover now all examples filtered by specified conditions414 p.filter.conditions = spec_conditions415 p.filterAndStore(examples, weightID, 0)416 pruned_conditions = self.prune_arg_conditions(ae, unspec_conditions, p.examples, p.weightID)417 p.filter.conditions.extend(pruned_conditions)418 p.filter.filter.conditions.extend(pruned_conditions)419 # if argument does not contain all unspecialized reasons, add those reasons with minimum values420 at_oper_pairs = [(c.position, c.oper) for c in p.filter.conditions if type(c) == Orange.core.ValueFilter_continuous]421 for u in unspec_conditions:422 if not (u.position, u.oper) in at_oper_pairs:423 # find minimum value424 u.ref = min([float(e[u.position])10. for e in p.examples])425 p.filter.conditions.append(u)426 p.filter.filter.conditions.append(u)427 428 429 # set parameters to arguments430 for p_i,p in enumerate(pos_args):431 p.filterAndStore(examples,weightID,0)432 p.filter.domain = examples.domain433 if not p.learner:434 p.learner = DefaultLearner(defaultValue=ae.getclass())435 p.classifier = p.learner(p.examples, p.weightID)436 p.baseDist = p.classDistribution437 p.requiredConditions = len(p.filter.conditions)438 p.learner.setattr("arg_length", len(p.filter.conditions))439 p.learner.setattr("arg_example", ae)440 p.complexity = len(p.filter.conditions)441 442 if hasattr(self.ruleFinder.evaluator, "returnExpectedProb"):443 self.ruleFinder.evaluator.returnExpectedProb = old_exp444 445 return pos_args446 447 def newFilter_values(self, filter):448 newFilter = Orange.core.Filter_values()449 newFilter.conditions = filter.conditions[:]450 newFilter.domain = filter.domain451 newFilter.negate = filter.negate452 newFilter.conjunction = filter.conjunction453 return newFilter454 455 def init_neg_args(self, ae, examples, weightID):456 return ae[self.argumentID].value.negativeArguments457 458 def remaining_probability(self, examples):459 return self.coverAndRemove.covered_percentage(examples)460 461 def prune_arg_conditions(self, crit_example, allowed_conditions, examples, weightID):462 if not allowed_conditions:463 return []464 cn2_learner = Orange.classification.rules.CN2UnorderedLearner()465 cn2_learner.ruleFinder = Orange.core.RuleBeamFinder()466 cn2_learner.ruleFinder.refiner = SelectorArgConditions(crit_example, allowed_conditions)467 cn2_learner.ruleFinder.evaluator = Orange.classification.rules.MEstimate(self.ruleFinder.evaluator.m)468 rule = cn2_learner.ruleFinder(examples,weightID,0,Orange.core.RuleList())469 return rule.filter.conditions470 471 472 class ABCN2Ordered(ABCN2):473 """ Rules learned by ABCN2 are ordered and used as a decision list. """474 def __init__(self, argumentID=0, **kwds):475 ABCN2.__init__(self, argumentID=argumentID, **kwds)476 self.classifier.set_prefix_rules = True477 self.classifier.optimize_betas = False478 479 class ABCN2M(ABCN2):480 """ Argument based rule learning with mestimate as evaluation function. """481 def __init__(self, argumentID=0, **kwds):482 ABCN2.__init__(self, argumentID=argumentID, **kwds)483 self.opt_reduction = 0484 485 486 # *********************** #487 # Argument based covering #488 # *********************** #489 490 class ABBeamFilter(Orange.core.RuleBeamFilter):491 """ ABBeamFilter: Filters beam;492  leaves first N rules (by quality)493  leaves first N rules that have only of arguments in condition part494 """495 def __init__(self,width=5):496 self.width=width497 self.pArgs=None498 499 def __call__(self,rulesStar,examples,weightID):500 newStar=Orange.core.RuleList()501 rulesStar.sort(lambda x,y: cmp(x.quality,y.quality))502 argsNum=0503 for r_i,r in enumerate(rulesStar):504 if r_i<self.width: # either is one of best "width" rules505 newStar.append(r)506 elif self.onlyPositives(r):507 if argsNum<self.width:508 newStar.append(r)509 argsNum+=1510 return newStar511 512 def setArguments(self,domain,positiveArguments):513 self.pArgs = positiveArguments514 self.domain = domain515 self.argTab = [0]*len(self.domain.attributes)516 for arg in self.pArgs:517 for cond in arg.filter.conditions:518 self.argTab[cond.position]=1519 520 def onlyPositives(self,rule):521 if not self.pArgs:522 return False523 524 ruleTab=[0]*len(self.domain.attributes)525 for cond in rule.filter.conditions:526 ruleTab[cond.position]=1527 return map(operator.or_,ruleTab,self.argTab)==self.argTab528 529 530 class ruleCoversArguments:531 """ Class determines if rule covers one out of a set of arguments. """532 def __init__(self, arguments):533 self.arguments = arguments534 self.indices = []535 for a in self.arguments:536 indNA = getattr(a.filter,"indices",None)537 if not indNA:538 a.filter.setattr("indices", ruleCoversArguments.filterIndices(a.filter))539 self.indices.append(a.filter.indices)540 541 def __call__(self, rule):542 if not self.indices:543 return False544 if not getattr(rule.filter,"indices",None):545 rule.filter.indices = ruleCoversArguments.filterIndices(rule.filter)546 for index in self.indices:547 if map(operator.or_,rule.filter.indices,index) == rule.filter.indices:548 return True549 return False550 551 def filterIndices(filter):552 if not filter.domain:553 return []554 ind = [0]*len(filter.domain.attributes)555 for c in filter.conditions:556 ind[c.position]=operator.or_(ind[c.position],557 ruleCoversArguments.conditionIndex(c))558 return ind559 filterIndices = staticmethod(filterIndices)560 561 def conditionIndex(c):562 if type(c) == Orange.core.ValueFilter_continuous:563 if (c.oper == Orange.core.ValueFilter_continuous.GreaterEqual or564 c.oper == Orange.core.ValueFilter_continuous.Greater):565 return 5# 0101566 elif (c.oper == Orange.core.ValueFilter_continuous.LessEqual or567 c.oper == Orange.core.ValueFilter_continuous.Less):568 return 3 # 0011569 else:570 return c.oper571 else:572 return 1 # 0001573 conditionIndex = staticmethod(conditionIndex)574 575 def oneSelectorToCover(ruleIndices, argIndices):576 at, type = 1, 0577 for r_i, ind in enumerate(ruleIndices):578 if not argIndices[r_i]:579 continue580 if at>1 and not ind == argIndices[r_i]: # need two changes581 return (1,0)582 if not ind == argIndices[r_i]:583 if argIndices[r_i] in [1,3,5]:584 at,type=r_i,argIndices[r_i]585 if argIndices[r_i]==6:586 if ind==3:587 at,type=r_i,5588 if ind==5:589 at,type=r_i,3590 return at,type591 oneSelectorToCover = staticmethod(oneSelectorToCover)592 593 class SelectorAdder(Orange.core.RuleBeamRefiner):594 """ Selector adder, this function is a refiner function:595  refined rules are not consistent with any of negative arguments. """596 def __init__(self, example=None, notAllowedSelectors=[], argumentID = None,597 discretizer = Orange.core.EntropyDiscretization(forceAttribute=True)):598 # required values  needed values of attributes599 self.example = example600 self.argumentID = argumentID601 self.notAllowedSelectors = notAllowedSelectors602 self.discretizer = discretizer603 604 def __call__(self, oldRule, data, weightID, targetClass=1):605 inNotAllowedSelectors = ruleCoversArguments(self.notAllowedSelectors)606 newRules = Orange.core.RuleList()607 608 # get positive indices (selectors already in the rule)609 indices = getattr(oldRule.filter,"indices",None)610 if not indices:611 indices = ruleCoversArguments.filterIndices(oldRule.filter)612 oldRule.filter.setattr("indices",indices)613 614 # get negative indices (selectors that should not be in the rule)615 negativeIndices = [0]*len(data.domain.attributes)616 for nA in self.notAllowedSelectors:617 #print indices, nA.filter.indices618 at_i,type_na = ruleCoversArguments.oneSelectorToCover(indices, nA.filter.indices)619 if at_i>1:620 negativeIndices[at_i] = operator.or_(negativeIndices[at_i],type_na)621 622 #iterate through indices = attributes623 for i,ind in enumerate(indices):624 if not self.example[i] or self.example[i].isSpecial():625 continue626 if ind == 1:627 continue628 if data.domain[i].varType == Orange.core.VarTypes.Discrete and not negativeIndices[i]==1: # DISCRETE attribute629 if self.example:630 values = [self.example[i]]631 else:632 values = data.domain[i].values633 for v in values:634 tempRule = oldRule.clone()635 tempRule.filter.conditions.append(Orange.core.ValueFilter_discrete(position = i,636 values = [Orange.core.Value(data.domain[i],v)],637 acceptSpecial=0))638 tempRule.complexity += 1639 tempRule.filter.indices[i] = 1 # 1 stands for discrete attribute (see ruleCoversArguments.conditionIndex)640 tempRule.filterAndStore(oldRule.examples, oldRule.weightID, targetClass)641 if len(tempRule.examples)<len(oldRule.examples):642 newRules.append(tempRule)643 elif data.domain[i].varType == Orange.core.VarTypes.Continuous and not negativeIndices[i]==7: # CONTINUOUS attribute644 try:645 at = data.domain[i]646 at_d = self.discretizer(at,oldRule.examples)647 except:648 continue # discretization failed !649 # If discretization makes sense? then:650 if len(at_d.values)>1:651 for p in at_d.getValueFrom.transformer.points:652 #LESS653 if not negativeIndices[i]==3:654 tempRule = self.getTempRule(oldRule,i,Orange.core.ValueFilter_continuous.LessEqual,p,targetClass,3)655 if len(tempRule.examples)<len(oldRule.examples) and self.example[i]<=p:# and not inNotAllowedSelectors(tempRule):656 newRules.append(tempRule)657 #GREATER658 if not negativeIndices[i]==5:659 tempRule = self.getTempRule(oldRule,i,Orange.core.ValueFilter_continuous.Greater,p,targetClass,5)660 if len(tempRule.examples)<len(oldRule.examples) and self.example[i]>p:# and not inNotAllowedSelectors(tempRule):661 newRules.append(tempRule)662 for r in newRules:663 r.parentRule = oldRule664 r.valuesFilter = r.filter.filter665 return newRules666 667 def getTempRule(self,oldRule,pos,oper,ref,targetClass,atIndex):668 tempRule = oldRule.clone()669 670 tempRule.filter.conditions.append(Orange.core.ValueFilter_continuous(position=pos,671 oper=oper,672 ref=ref,673 acceptSpecial=0))674 tempRule.complexity += 1675 tempRule.filter.indices[pos] = operator.or_(tempRule.filter.indices[pos],atIndex) # from ruleCoversArguments.conditionIndex676 tempRule.filterAndStore(oldRule.examples,tempRule.weightID,targetClass)677 return tempRule678 679 def setCondition(self, oldRule, targetClass, ci, condition):680 tempRule = oldRule.clone()681 tempRule.filter.conditions[ci] = condition682 tempRule.filter.conditions[ci].setattr("specialized",1)683 tempRule.filterAndStore(oldRule.examples,oldRule.weightID,targetClass)684 return tempRule685 686 687 # This filter is the ugliest code ever! Problem is with Orange, I had some problems with inheriting deepCopy688 # I should take another look at it.689 class ArgFilter(Orange.core.Filter):690 """ This class implements ABcovering principle. """691 def __init__(self, argumentID=None, filter = Orange.core.Filter_values()):692 self.filter = filter693 self.indices = getattr(filter,"indices",[])694 if not self.indices and len(filter.conditions)>0:695 self.indices = ruleCoversArguments.filterIndices(filter)696 self.argumentID = argumentID697 self.debug = 0698 self.domain = self.filter.domain699 self.conditions = filter.conditions700 701 def condIn(self,cond): # is condition in the filter?702 condInd = ruleCoversArguments.conditionIndex(cond)703 if operator.or_(condInd,self.indices[cond.position]) == self.indices[cond.position]:704 return True705 return False706 707 def __call__(self,example):708 ## print "in", self.filter(example), self.filter.conditions[0](example)709 ## print self.filter.conditions[1].values710 if self.filter(example):711 try:712 if example[self.argumentID].value and len(example[self.argumentID].value.positiveArguments)>0: # example has positive arguments713 # conditions should cover at least one of the positive arguments714 oneArgCovered = False715 for pA in example[self.argumentID].value.positiveArguments:716 argCovered = [self.condIn(c) for c in pA.filter.conditions]717 oneArgCovered = oneArgCovered or len(argCovered) == sum(argCovered) #argCovered718 if oneArgCovered:719 break720 if not oneArgCovered:721 return False722 if example[self.argumentID].value and len(example[self.argumentID].value.negativeArguments)>0: # example has negative arguments723 # condition should not cover neither of negative arguments724 for pN in example[self.argumentID].value.negativeArguments:725 argCovered = [self.condIn(c) for c in pN.filter.conditions]726 if len(argCovered)==sum(argCovered):727 return False728 except:729 return True730 return True731 else:732 return False733 734 def __setattr__(self,name,obj):735 self.__dict__[name]=obj736 self.filter.setattr(name,obj)737 738 def deepCopy(self):739 newFilter = ArgFilter(argumentID=self.argumentID)740 newFilter.filter = Orange.core.Filter_values() #self.filter.deepCopy()741 newFilter.filter.conditions = self.filter.conditions[:]742 newFilter.domain = self.filter.domain743 newFilter.negate = self.filter.negate744 newFilter.conjunction = self.filter.conjunction745 newFilter.domain = self.filter.domain746 newFilter.conditions = newFilter.filter.conditions747 newFilter.indices = self.indices[:]748 if getattr(self,"candidateValues",None):749 newFilter.candidateValues = self.candidateValues[:]750 return newFilter751 752 753 class SelectorArgConditions(Orange.core.RuleBeamRefiner):754 """ Selector adder, this function is a refiner function:755  refined rules are not consistent with any of negative arguments. """756 def __init__(self, example, allowed_selectors):757 # required values  needed values of attributes758 self.example = example759 self.allowed_selectors = allowed_selectors760 761 def __call__(self, oldRule, data, weightID, targetClass=1):762 if len(oldRule.filter.conditions) >= len(self.allowed_selectors):763 return Orange.core.RuleList()764 newRules = Orange.core.RuleList()765 for c in self.allowed_selectors:766 # normal condition767 if not c.unspecialized_condition:768 tempRule = oldRule.clone()769 tempRule.filter.conditions.append(c)770 tempRule.filterAndStore(oldRule.examples, oldRule.weightID, targetClass)771 if len(tempRule.examples)<len(oldRule.examples):772 newRules.append(tempRule)773 # unspecified condition774 else:775 # find all possible example values776 vals = {}777 for e in oldRule.examples:778 if not e[c.position].isSpecial():779 vals[str(e[c.position])] = 1780 values = vals.keys()781 # for each value make a condition782 for v in values:783 tempRule = oldRule.clone()784 tempRule.filter.conditions.append(Orange.core.ValueFilter_continuous(position=c.position,785 oper=c.oper,786 ref=float(v),787 acceptSpecial=0))788 if tempRule(self.example):789 tempRule.filterAndStore(oldRule.examples, oldRule.weightID, targetClass)790 if len(tempRule.examples)<len(oldRule.examples):791 newRules.append(tempRule)792 ## print " NEW RULES "793 ## for r in newRules:794 ## print Orange.classification.rules.ruleToString(r)795 for r in newRules:796 r.parentRule = oldRule797 ## print Orange.classification.rules.ruleToString(r)798 return newRules799 800 801 # ********************** #802 # Probabilistic covering #803 # ********************** #804 805 class CovererAndRemover_Prob(Orange.core.RuleCovererAndRemover):806 """ This class impements probabilistic covering. """807 808 def __init__(self, examples, weightID, targetClass, apriori):809 self.bestRule = [None]*len(examples)810 self.probAttribute = Orange.core.newmetaid()811 self.aprioriProb = apriori[targetClass]/apriori.abs812 examples.addMetaAttribute(self.probAttribute, self.aprioriProb)813 examples.domain.addmeta(self.probAttribute, Orange.core.FloatVariable("Probs"))814 815 def getBestRules(self, currentRules, examples, weightID):816 bestRules = Orange.core.RuleList()817 ## for r in currentRules:818 ## if hasattr(r.learner, "argumentRule") and not Orange.classification.rules.rule_in_set(r,bestRules):819 ## bestRules.append(r)820 for r_i,r in enumerate(self.bestRule):821 if r and not Orange.classification.rules.rule_in_set(r,bestRules) and int(examples[r_i].getclass())==int(r.classifier.defaultValue):822 bestRules.append(r)823 return bestRules824 825 def __call__(self, rule, examples, weights, targetClass):826 if hasattr(rule, "learner") and hasattr(rule.learner, "arg_example"):827 example = rule.learner.arg_example828 else:829 example = None830 for ei, e in enumerate(examples):831 ## if e == example:832 ## e[self.probAttribute] = 1.0833 ## self.bestRule[ei]=rule834 if example and not (hasattr(self.bestRule[ei], "learner") and hasattr(self.bestRule[ei].learner, "arg_example")):835 can_be_worst = True836 else:837 can_be_worst = False838 if can_be_worst and rule(e) and rule.quality>(e[self.probAttribute]0.01):839 e[self.probAttribute] = rule.quality+0.001 # 0.001 is added to avoid numerical errors840 self.bestRule[ei]=rule841 elif rule(e) and rule.quality>e[self.probAttribute]:842 e[self.probAttribute] = rule.quality+0.001 # 0.001 is added to avoid numerical errors843 self.bestRule[ei]=rule844 return (examples,weights)845 846 def covered_percentage(self, examples):847 p = 0.0848 for ei, e in enumerate(examples):849 p += (e[self.probAttribute]  self.aprioriProb)/(1.0self.aprioriProb)850 return p/len(examples)851 852 853 # **************************************** #854 # Estimation of extreme value distribution #855 # **************************************** #856 857 # Miscellaneous  utility functions858 def avg(l):859 return sum(l)/len(l) if l else 0.860 861 def var(l):862 if len(l)<2:863 return 0.864 av = avg(l)865 return sum([math.pow(liav,2) for li in l])/(len(l)1)866 867 def perc(l,p):868 l.sort()869 return l[int(math.floor(p*len(l)))]870 871 class EVDFitter:872 """ Randomizes a dataset and fits an extreme value distribution onto it. """873 874 def __init__(self, learner, n=200, randomseed=100):875 self.learner = learner876 self.n = n877 self.randomseed = randomseed878 879 def createRandomDataSet(self, data):880 newData = Orange.core.ExampleTable(data)881 # shuffle data882 cl_num = newData.toNumpy("C")883 random.shuffle(cl_num[0][:,0])884 clData = Orange.core.ExampleTable(Orange.core.Domain([newData.domain.classVar]),cl_num[0])885 for d_i,d in enumerate(newData):886 d[newData.domain.classVar] = clData[d_i][newData.domain.classVar]887 return newData888 889 def createEVDistList(self, evdList):890 l = Orange.core.EVDistList()891 for el in evdList:892 l.append(Orange.core.EVDist(mu=el[0],beta=el[1],percentiles=el[2]))893 return l894 895 # estimated fisher tippett parameters for a set of values given in vals list (+ deciles)896 def compParameters(self, vals, oldMi=0.5,oldBeta=1.1):897 # compute percentiles898 vals.sort()899 N = len(vals)900 percs = [avg(vals[int(float(N)*i/10):int(float(N)*(i+1)/10)]) for i in range(10)]901 if N<10:902 return oldMi, oldBeta, percs903 beta = min(2.0, max(oldBeta, math.sqrt(6*var(vals)/math.pow(math.pi,2))))904 mi = max(oldMi,percs[1]+beta*math.log(math.log(0.95)))905 return mi, beta, percs906 907 def prepare_learner(self):908 self.oldStopper = self.learner.ruleFinder.ruleStoppingValidator909 self.evaluator = self.learner.ruleFinder.evaluator910 self.refiner = self.learner.ruleFinder.refiner911 self.validator = self.learner.ruleFinder.validator912 self.ruleFilter = self.learner.ruleFinder.ruleFilter913 self.learner.ruleFinder.validator = None914 self.learner.ruleFinder.evaluator = Orange.core.RuleEvaluator_LRS()915 self.learner.ruleFinder.evaluator.storeRules = True916 self.learner.ruleFinder.ruleStoppingValidator = Orange.core.RuleValidator_LRS(alpha=1.0)917 self.learner.ruleFinder.ruleStoppingValidator.max_rule_complexity = 0918 self.learner.ruleFinder.refiner = Orange.core.RuleBeamRefiner_Selector()919 self.learner.ruleFinder.ruleFilter = Orange.core.RuleBeamFilter_Width(width = 1)920 921 922 def restore_learner(self):923 self.learner.ruleFinder.evaluator = self.evaluator924 self.learner.ruleFinder.ruleStoppingValidator = self.oldStopper925 self.learner.ruleFinder.refiner = self.refiner926 self.learner.ruleFinder.validator = self.validator927 self.learner.ruleFinder.ruleFilter = self.ruleFilter928 929 def computeEVD(self, data, weightID=0, target_class=0, progress=None):930 # initialize random seed to make experiments repeatable931 random.seed(self.randomseed)932 933 # prepare learned for distribution computation934 self.prepare_learner()935 936 # loop through N (sampling repetitions)937 extremeDists=[(0, 1, [])]938 self.learner.ruleFinder.ruleStoppingValidator.max_rule_complexity = self.oldStopper.max_rule_complexity939 maxVals = [[] for l in range(self.oldStopper.max_rule_complexity)]940 for d_i in range(self.n):941 if not progress:942 print d_i,943 else:944 progress(float(d_i)/self.n, None)945 # create data set (remove and randomize)946 tempData = self.createRandomDataSet(data)947 self.learner.ruleFinder.evaluator.rules = Orange.core.RuleList()948 # Next, learn a rule949 self.learner.ruleFinder(tempData,weightID,target_class, Orange.core.RuleList())950 for l in range(self.oldStopper.max_rule_complexity):951 qs = [r.quality for r in self.learner.ruleFinder.evaluator.rules if r.complexity == l+1]952 if qs:953 maxVals[l].append(max(qs))954 else:955 maxVals[l].append(0)956 957 mu, beta = 1.0, 1.0958 for mi,m in enumerate(maxVals):959 mu, beta, perc = self.compParameters(m,mu,beta)960 extremeDists.append((mu, beta, perc))961 extremeDists.extend([(0,1,[])]*(mi))962 963 self.restore_learner()964 return self.createEVDistList(extremeDists)965 966 # ************************* #967 # Rule based classification #968 # ************************* #969 970 class CrossValidation:971 def __init__(self, folds=5, randomGenerator = 150):972 self.folds = folds973 self.randomGenerator = randomGenerator974 975 def __call__(self, learner, examples, weight):976 res = orngTest.crossValidation([learner], (examples, weight), folds = self.folds, randomGenerator = self.randomGenerator)977 return self.get_prob_from_res(res, examples)978 979 def get_prob_from_res(self, res, examples):980 probDist = Orange.core.DistributionList()981 for tex in res.results:982 d = Orange.core.Distribution(examples.domain.classVar)983 for di in range(len(d)):984 d[di] = tex.probabilities[0][di]985 probDist.append(d)986 return probDist987 988 class PILAR:989 """ PILAR (Probabilistic improvement of learning algorithms with rules) """990 def __init__(self, alternative_learner = None, min_cl_sig = 0.5, min_beta = 0.0, set_prefix_rules = False, optimize_betas = True):991 self.alternative_learner = alternative_learner992 self.min_cl_sig = min_cl_sig993 self.min_beta = min_beta994 self.set_prefix_rules = set_prefix_rules995 self.optimize_betas = optimize_betas996 self.selected_evaluation = CrossValidation(folds=5)997 998 def __call__(self, rules, examples, weight=0):999 rules = self.add_null_rule(rules, examples, weight)1000 if self.alternative_learner:1001 probDist = self.selected_evaluation(self.alternative_learner, examples, weight)1002 classifier = self.alternative_learner(examples,weight)1003 ## probDist = Orange.core.DistributionList()1004 ## for e in examples:1005 ## probDist.append(classifier(e,Orange.core.GetProbabilities))1006 cl = Orange.core.RuleClassifier_logit(rules, self.min_cl_sig, self.min_beta, examples, weight, self.set_prefix_rules, self.optimize_betas, classifier, probDist)1007 else:1008 cl = Orange.core.RuleClassifier_logit(rules, self.min_cl_sig, self.min_beta, examples, weight, self.set_prefix_rules, self.optimize_betas)1009 1010 ## print "result"1011 for ri,r in enumerate(cl.rules):1012 cl.rules[ri].setattr("beta",cl.ruleBetas[ri])1013 ## if cl.ruleBetas[ri] > 0:1014 ## print Orange.classification.rules.ruleToString(r), r.quality, cl.ruleBetas[ri]1015 cl.all_rules = cl.rules1016 cl.rules = self.sortRules(cl.rules)1017 cl.ruleBetas = [r.beta for r in cl.rules]1018 cl.setattr("data", examples)1019 return cl1020 1021 def add_null_rule(self, rules, examples, weight):1022 for cl in examples.domain.classVar:1023 tmpRle = Orange.core.Rule()1024 tmpRle.filter = Orange.core.Filter_values(domain = examples.domain)1025 tmpRle.parentRule = None1026 tmpRle.filterAndStore(examples,weight,int(cl))1027 tmpRle.quality = tmpRle.classDistribution[int(cl)]/tmpRle.classDistribution.abs1028 rules.append(tmpRle)1029 return rules1030 1031 def sortRules(self, rules):1032 newRules = Orange.core.RuleList()1033 foundRule = True1034 while foundRule:1035 foundRule = False1036 bestRule = None1037 for r in rules:1038 if r in newRules:1039 continue1040 if r.beta < 0.01 and r.beta > 0.01:1041 continue1042 if not bestRule:1043 bestRule = r1044 foundRule = True1045 continue1046 if len(r.filter.conditions) < len(bestRule.filter.conditions):1047 bestRule = r1048 foundRule = True1049 continue1050 if len(r.filter.conditions) == len(bestRule.filter.conditions) and r.beta > bestRule.beta:1051 bestRule = r1052 foundRule = True1053 continue1054 if bestRule:1055 newRules.append(bestRule)1056 return newRules1057 1058 1059 class CN2UnorderedClassifier(Orange.core.RuleClassifier):1060 """ Classification from rules as in CN2. """1061 def __init__(self, rules, examples, weightID = 0, **argkw):1062 self.rules = rules1063 self.examples = examples1064 self.weightID = weightID1065 self.prior = Orange.core.Distribution(examples.domain.classVar, examples, weightID)1066 self.__dict__.update(argkw)1067 1068 def __call__(self, example, result_type=Orange.core.GetValue, retRules = False):1069 # iterate through the set of induced rules: self.rules and sum their distributions1070 ret_dist = self.sum_distributions([r for r in self.rules if r(example)])1071 # normalize1072 a = sum(ret_dist)1073 for ri, r in enumerate(ret_dist):1074 ret_dist[ri] = ret_dist[ri]/a1075 ## ret_dist.normalize()1076 # return value1077 if result_type == Orange.core.GetValue:1078 return ret_dist.modus()1079 if result_type == Orange.core.GetProbabilities:1080 return ret_dist1081 return (ret_dist.modus(),ret_dist)1082 1083 def sum_distributions(self, rules):1084 if not rules:1085 return self.prior1086 empty_disc = Orange.core.Distribution(rules[0].examples.domain.classVar)1087 for r in rules:1088 for i,d in enumerate(r.classDistribution):1089 empty_disc[i] = empty_disc[i] + d1090 return empty_disc1091 1092 def __str__(self):1093 retStr = ""1094 for r in self.rules:1095 retStr += Orange.classification.rules.ruleToString(r)+" "+str(r.classDistribution)+"\n"1096 return retStr1097 1098 1099 class RuleClassifier_bestRule(Orange.core.RuleClassifier):1100 """ A very simple classifier, it takes the best rule of each class and normalizes probabilities. """1101 def __init__(self, rules, examples, weightID = 0, **argkw):1102 self.rules = rules1103 self.examples = examples1104 self.apriori = Orange.core.Distribution(examples.domain.classVar,examples,weightID)1105 self.aprioriProb = [a/self.apriori.abs for a in self.apriori]1106 self.weightID = weightID1107 self.__dict__.update(argkw)1108 self.defaultClassIndex = 11109 1110 def __call__(self, example, result_type=Orange.core.GetValue, retRules = False):1111 example = Orange.core.Example(self.examples.domain,example)1112 tempDist = Orange.core.Distribution(example.domain.classVar)1113 bestRules = [None]*len(example.domain.classVar.values)1114 1115 for r in self.rules:1116 if r(example) and not self.defaultClassIndex == int(r.classifier.defaultVal) and \1117 (not bestRules[int(r.classifier.defaultVal)] or r.quality>tempDist[r.classifier.defaultVal]):1118 tempDist[r.classifier.defaultVal] = r.quality1119 bestRules[int(r.classifier.defaultVal)] = r1120 for b in bestRules:1121 if b:1122 used = getattr(b,"used",0.0)1123 b.setattr("used",used+1)1124 nonCovPriorSum = sum([tempDist[i] == 0. and self.aprioriProb[i] or 0. for i in range(len(self.aprioriProb))])1125 if tempDist.abs < 1.:1126 residue = 1.  tempDist.abs1127 for a_i,a in enumerate(self.aprioriProb):1128 if tempDist[a_i] == 0.:1129 tempDist[a_i]=self.aprioriProb[a_i]*residue/nonCovPriorSum1130 finalDist = tempDist #Orange.core.Distribution(example.domain.classVar)1131 else:1132 tempDist.normalize() # prior probability1133 tmpExamples = Orange.core.ExampleTable(self.examples)1134 for r in bestRules:1135 if r:1136 tmpExamples = r.filter(tmpExamples)1137 tmpDist = Orange.core.Distribution(tmpExamples.domain.classVar,tmpExamples,self.weightID)1138 tmpDist.normalize()1139 probs = [0.]*len(self.examples.domain.classVar.values)1140 for i in range(len(self.examples.domain.classVar.values)):1141 probs[i] = tmpDist[i]+tempDist[i]*21142 finalDist = Orange.core.Distribution(self.examples.domain.classVar)1143 for cl_i,cl in enumerate(self.examples.domain.classVar):1144 finalDist[cl] = probs[cl_i]1145 finalDist.normalize()1146 1147 if retRules: # Do you want to return rules with classification?1148 if result_type == Orange.core.GetValue:1149 return (finalDist.modus(),bestRules)1150 if result_type == Orange.core.GetProbabilities:1151 return (finalDist, bestRules)1152 return (finalDist.modus(),finalDist, bestRules)1153 if result_type == Orange.core.GetValue:1154 return finalDist.modus()1155 if result_type == Orange.core.GetProbabilities:1156 return finalDist1157 return (finalDist.modus(),finalDist)1158 1159 1160
Note: See TracChangeset
for help on using the changeset viewer.