Changeset 7403:2bcf352ae815 in orange


Ignore:
Timestamp:
02/04/11 10:59:45 (3 years ago)
Author:
crt <crtomir.gorup@…>
Branch:
default
Convert:
f283be58134d43f8c8f31512afc031d8e8f6a879
Message:

Links to test cases and data sets.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/classification/rules.py

    r7383 r7403  
    2323Usage is consistent with typical learner usage in Orange: 
    2424 
     25`rules-cn2.py`_ (uses `titanic.tab`_) 
     26 
    2527.. literalinclude:: code/rules-cn2.py 
    2628    :lines: 7- 
     29 
     30.. _rules-cn2.py: code/rules-cn2.py 
     31.. _titanic.tab: code/titanic.tab 
    2732 
    2833This is the resulting printout:: 
     
    95100in description of classes that follows it: 
    96101 
     102part of `rules-customized.py`_ (uses `titanic.tab`_) 
     103 
    97104.. literalinclude:: code/rules-customized.py 
    98105    :lines: 7-17 
     106 
     107.. _rules-customized.py: code/rules-customized.py 
    99108 
    100109In the example, the rule evaluation function was set to an m-estimate of 
     
    119128set a different validation function and a different bean width. This is simply 
    120129written as: 
     130 
     131part of `rules-customized.py`_ (uses `titanic.tab`_) 
    121132 
    122133.. literalinclude:: code/rules-customized.py 
     
    605616            p = p / (rule.classDistribution.abs + self.m) 
    606617        else: 
    607             p = max(rule.classDistribution)+self.m*apriori[rule.classDistribution.modus()]/apriori.abs 
     618            p = max(rule.classDistribution)+self.m*apriori[rule.\ 
     619                classDistribution.modus()]/apriori.abs 
    608620            p = p / (rule.classDistribution.abs + self.m)       
    609621        return p 
     
    642654            return self 
    643655         
    644     def __init__(self, evaluator = RuleEvaluator_Entropy(), beamWidth = 5, alpha = 1.0, **kwds): 
     656    def __init__(self, evaluator = RuleEvaluator_Entropy(), beamWidth = 5, 
     657        alpha = 1.0, **kwds): 
    645658        self.__dict__.update(kwds) 
    646659        self.ruleFinder = RuleBeamFinder() 
     
    664677    classifier. 
    665678         
    666     :param instance: instance to be classifier 
     679    :param instance: instance to be classified. 
    667680    :type instance: :class:`Orange.data.Instance` 
    668681     
     
    682695        self.__dict__.update(argkw) 
    683696        if instances is not None: 
    684             self.prior = Orange.core.Distribution(instances.domain.classVar, instances) 
     697            self.prior = Orange.core.Distribution(instances.domain.classVar,instances) 
    685698 
    686699    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue): 
     
    693706                break 
    694707        if not classifier: 
    695             classifier = Orange.core.DefaultClassifier(instance.domain.classVar, self.prior.modus()) 
     708            classifier = Orange.core.DefaultClassifier(instance.domain.classVar,\ 
     709                self.prior.modus()) 
    696710            classifier.defaultDistribution = self.prior 
    697711 
     
    703717 
    704718    def __str__(self): 
    705         retStr = ruleToString(self.rules[0])+" "+str(self.rules[0].classDistribution)+"\n" 
     719        retStr = ruleToString(self.rules[0])+" "+str(self.rules[0].\ 
     720            classDistribution)+"\n" 
    706721        for r in self.rules[1:]: 
    707722            retStr += "ELSE "+ruleToString(r)+" "+str(r.classDistribution)+"\n" 
     
    741756            return self 
    742757             
    743     def __init__(self, evaluator = RuleEvaluator_Laplace(), beamWidth = 5, alpha = 1.0, **kwds): 
     758    def __init__(self, evaluator = RuleEvaluator_Laplace(), beamWidth = 5, 
     759        alpha = 1.0, **kwds): 
    744760        self.__dict__.update(kwds) 
    745761        self.ruleFinder = RuleBeamFinder() 
     
    755771         
    756772        rules = RuleList() 
    757         self.ruleStopping.apriori = Orange.core.Distribution(instances.domain.classVar,instances) 
     773        self.ruleStopping.apriori = Orange.core.Distribution(instances.\ 
     774            domain.classVar,instances) 
    758775        progress=getattr(self,"progressCallback",None) 
    759776        if progress: 
    760777            progress.start = 0.0 
    761778            progress.end = 0.0 
    762             distrib = Orange.core.Distribution(instances.domain.classVar, instances, weight) 
     779            distrib = Orange.core.Distribution(instances.domain.classVar,\ 
     780                instances, weight) 
    763781            distrib.normalize() 
    764782        for targetClass in instances.domain.classVar: 
     
    782800    construct the classifier. 
    783801         
    784     :param instance: instance to be classifier 
     802    :param instance: instance to be classified. 
    785803    :type instance: :class:`Orange.data.Instance` 
    786804    :param result_type: :class:`Orange.classification.Classifier.GetValue` or \ 
     
    886904            return self 
    887905         
    888     def __init__(self, evaluator = WRACCEvaluator(), beamWidth = 5, alpha = 0.05, mult=0.7, **kwds): 
     906    def __init__(self, evaluator = WRACCEvaluator(), beamWidth = 5, 
     907                alpha = 0.05, mult=0.7, **kwds): 
    889908        CN2UnorderedLearnerClass.__init__(self, evaluator = evaluator, 
    890909                                          beamWidth = beamWidth, alpha = alpha, **kwds) 
     
    926945    :type mult: float 
    927946    """ 
    928     def __init__(self, width=5, nsampling=100, rule_sig=1.0, att_sig=1.0, min_coverage = 1., max_rule_complexity = 5.): 
    929         ABCN2.__init__(self, width=width, nsampling=nsampling, rule_sig=rule_sig, att_sig=att_sig, 
    930                        min_coverage=int(min_coverage), max_rule_complexity = int(max_rule_complexity)) 
     947    def __init__(self, width=5, nsampling=100, rule_sig=1.0, att_sig=1.0,\ 
     948        min_coverage = 1., max_rule_complexity = 5.): 
     949        ABCN2.__init__(self, width=width, nsampling=nsampling, 
     950            rule_sig=rule_sig, att_sig=att_sig, min_coverage=int(min_coverage), 
     951            max_rule_complexity = int(max_rule_complexity)) 
    931952 
    932953 
     
    973994    def __init__(self,alpha=.05,min_coverage=0,max_rule_length=0,rules=RuleList()): 
    974995        self.rules = rules 
    975         self.validator = RuleValidator_LRS(alpha=alpha,min_coverage=min_coverage,max_rule_length=max_rule_length) 
     996        self.validator = RuleValidator_LRS(alpha=alpha,\ 
     997            min_coverage=min_coverage,max_rule_length=max_rule_length) 
    976998         
    977999    def __call__(self, rule, data, weightID, targetClass, apriori): 
     
    10361058            weights = Orange.core.newmetaid() 
    10371059            instances.addMetaAttribute(weights,1.) 
    1038             instances.domain.addmeta(weights, Orange.data.feature.Continuous("weights-"+str(weights)), True) 
     1060            instances.domain.addmeta(weights, Orange.data.feature.\ 
     1061                Continuous("weights-"+str(weights)), True) 
    10391062        newWeightsID = Orange.core.newmetaid() 
    10401063        instances.addMetaAttribute(newWeightsID,1.) 
    1041         instances.domain.addmeta(newWeightsID, Orange.data.feature.Continuous("weights-"+str(newWeightsID)), True) 
     1064        instances.domain.addmeta(newWeightsID, Orange.data.feature.\ 
     1065            Continuous("weights-"+str(newWeightsID)), True) 
    10421066        for instance in instances: 
    1043             if rule(instance) and instance.getclass() == rule.classifier(instance,Orange.classification.Classifier.GetValue): 
     1067            if rule(instance) and instance.getclass() == rule.classifier(\ 
     1068                instance,Orange.classification.Classifier.GetValue): 
    10441069                instance[newWeightsID]=instance[weights]*self.mult 
    10451070            else: 
     
    10581083            weights = Orange.core.newmetaid() 
    10591084            instances.addMetaAttribute(weights,1.) 
    1060             instances.domain.addmeta(weights, Orange.data.feature.Continuous("weights-"+str(weights)), True) 
     1085            instances.domain.addmeta(weights, Orange.data.feature.\ 
     1086                Continuous("weights-"+str(weights)), True) 
    10611087        try: 
    10621088            coverage = instances.domain.getmeta("Coverage") 
     
    10671093        newWeightsID = Orange.core.newmetaid() 
    10681094        instances.addMetaAttribute(newWeightsID,1.) 
    1069         instances.domain.addmeta(newWeightsID, Orange.data.feature.Continuous("weights-"+str(newWeightsID)), True) 
     1095        instances.domain.addmeta(newWeightsID, Orange.data.feature.\ 
     1096            Continuous("weights-"+str(newWeightsID)), True) 
    10701097        for instance in instances: 
    1071             if rule(instance) and instance.getclass() == rule.classifier(instance,Orange.classification.Classifier.GetValue): 
     1098            if rule(instance) and instance.getclass() == rule.classifier(instance,\ 
     1099                    Orange.classification.Classifier.GetValue): 
    10721100                try: 
    10731101                    instance[coverage]+=1.0 
     
    10911119        self.probAttribute = Orange.core.newmetaid() 
    10921120        instances.addMetaAttribute(self.probAttribute,-1.e-6) 
    1093         instances.domain.addmeta(self.probAttribute, Orange.data.feature.Continuous("Probs")) 
     1121        instances.domain.addmeta(self.probAttribute, \ 
     1122            Orange.data.feature.Continuous("Probs")) 
    10941123        for instance in instances: 
    10951124##            if targetClass<0 or (instance.getclass() == targetClass): 
     
    11031132                bestRules.append(r) 
    11041133        for r_i,r in enumerate(self.bestRule): 
    1105             if r and not rule_in_set(r,bestRules) and instances[r_i].getclass()==r.classifier.defaultValue: 
     1134            if r and not rule_in_set(r,bestRules) and instances[r_i].\ 
     1135                getclass()==r.classifier.defaultValue: 
    11061136                bestRules.append(r) 
    11071137        return bestRules 
     
    11681198            ret += " AND " 
    11691199        if type(c) == Orange.core.ValueFilter_discrete: 
    1170             ret += domain[c.position].name + "=" + str([domain[c.position].values[int(v)] for v in c.values]) 
     1200            ret += domain[c.position].name + "=" + str([domain[c.position].\ 
     1201                values[int(v)] for v in c.values]) 
    11711202        elif type(c) == Orange.core.ValueFilter_continuous: 
    11721203            ret += domain[c.position].name + selectSign(c.oper) + str(c.ref) 
    1173     if rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier and rule.classifier.defaultVal: 
     1204    if rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier\ 
     1205            and rule.classifier.defaultVal: 
    11741206        ret = ret + " THEN "+domain.classVar.name+"="+\ 
    11751207        str(rule.classifier.defaultValue) 
    11761208        if showDistribution: 
    11771209            ret += str(rule.classDistribution) 
    1178     elif rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier and type(domain.classVar) == Orange.core.EnumVariable: 
     1210    elif rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier\ 
     1211            and type(domain.classVar) == Orange.core.EnumVariable: 
    11791212        ret = ret + " THEN "+domain.classVar.name+"="+\ 
    11801213        str(rule.classDistribution.modus()) 
     
    12701303 
    12711304def computeDists(data, weight=0, targetClass=0, N=100, learner=None): 
    1272     """ Compute distributions of likelihood ratio statistics of extreme (best) rules.  """ 
     1305    """ Compute distributions of likelihood ratio statistics of extreme (best) rules.""" 
    12731306    if not learner: 
    12741307        learner = createLearner() 
     
    13281361                oldREP = learner.ruleFinder.evaluator.returnExpectedProb 
    13291362                learner.ruleFinder.evaluator.returnExpectedProb = False 
    1330                 learner.ruleFinder.evaluator.evDistGetter.dists = createEVDistList(dists[int(r.classifier.defaultVal)]) 
    1331                 tmpRule.quality = learner.ruleFinder.evaluator(tmpRule,instances,weight,r.classifier.defaultVal,apriori) 
     1363                learner.ruleFinder.evaluator.evDistGetter.dists = createEVDistList(\ 
     1364                        dists[int(r.classifier.defaultVal)]) 
     1365                tmpRule.quality = learner.ruleFinder.evaluator(tmpRule, 
     1366                        instances,weight,r.classifier.defaultVal,apriori) 
    13321367                learner.ruleFinder.evaluator.returnExpectedProb = oldREP 
    13331368                # if rule not in rules already, add it to the list 
    1334                 if not True in [rules_equal(ri,tmpRule) for ri in newRules] and len(tmpRule.filter.conditions)>0 and tmpRule.quality > apriori[r.classifier.defaultVal]/apriori.abs: 
     1369                if not True in [rules_equal(ri,tmpRule) for ri in newRules] and\ 
     1370                        len(tmpRule.filter.conditions)>0 and tmpRule.quality >\ 
     1371                            apriori[r.classifier.defaultVal]/apriori.abs: 
    13351372                    newRules.append(tmpRule) 
    13361373                # create new tmpRules, set parent Rule, append them to tmpList2 
Note: See TracChangeset for help on using the changeset viewer.