Changeset 7230:13f6e74cb3b7 in orange


Ignore:
Timestamp:
02/02/11 19:58:36 (3 years ago)
Author:
matija <matija.polajnar@…>
Branch:
default
Convert:
2dbf67e4097ab3fea846708b0a5a5629f58817e5
Message:

Continuation of refactoring and documentation; still unfinished.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/classification/rules.py

    r7223 r7230  
    1717 
    1818All variations of CN2 are implemented by wrapping 
    19 :class:`Orange.core.RuleLearner` class. Each CN2 learner class in this module 
    20 changes some of RuleLearner's replaceable components to reflect the required 
    21 behaviour. Thus, in the description of each class, we mention only components 
    22 that differ from default values. 
     19:class:`Orange.classification.rules.RuleLearner` class. Each CN2 learner class 
     20in this module changes some of RuleLearner's replaceable components to reflect 
     21the required behaviour. Thus, in the description of each class, we mention only 
     22components that differ from default values. 
    2323 
    2424.. autoclass:: Orange.classification.rules.CN2Learner 
     
    2626   :show-inheritance: 
    2727   :undoc-members: 
     28    
     29.. autoclass:: Orange.classification.rules.CN2Classifier 
     30   :members: 
     31   :show-inheritance: 
    2832    
    2933.. index:: Unordered CN2 
     
    3337   :show-inheritance: 
    3438   :undoc-members: 
     39    
     40.. autoclass:: Orange.classification.rules.CN2UnorderedClassifier 
     41   :members: 
     42   :show-inheritance: 
    3543    
    3644.. index:: CN2-SD 
     
    96104    RuleValidator_LRS 
    97105 
    98  
    99  
    100  
    101 ################################################################################ 
    102 # Following is a copy&paste of orngCN2 ...                                     # 
    103 ################################################################################ 
    104  
    105  
    106106import Orange.core 
    107 import random, math 
     107import random 
     108import math 
    108109from orngABCN2 import ABCN2 
    109110 
    110 def ruleToString(rule, showDistribution = True): 
    111     def selectSign(oper): 
    112         if oper == Orange.core.ValueFilter_continuous.Less: 
    113             return "<" 
    114         elif oper == Orange.core.ValueFilter_continuous.LessEqual: 
    115             return "<=" 
    116         elif oper == Orange.core.ValueFilter_continuous.Greater: 
    117             return ">" 
    118         elif oper == Orange.core.ValueFilter_continuous.GreaterEqual: 
    119             return ">=" 
    120         else: return "=" 
    121  
    122     if not rule: 
    123         return "None" 
    124     conds = rule.filter.conditions 
    125     domain = rule.filter.domain 
    126      
    127     ret = "IF " 
    128     if len(conds)==0: 
    129         ret = ret + "TRUE" 
    130  
    131     for i,c in enumerate(conds): 
    132         if i > 0: 
    133             ret += " AND " 
    134         if type(c) == Orange.core.ValueFilter_discrete: 
    135             ret += domain[c.position].name + "=" + str([domain[c.position].values[int(v)] for v in c.values]) 
    136         elif type(c) == Orange.core.ValueFilter_continuous: 
    137             ret += domain[c.position].name + selectSign(c.oper) + str(c.ref) 
    138     if rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier and rule.classifier.defaultVal: 
    139         ret = ret + " THEN "+domain.classVar.name+"="+\ 
    140         str(rule.classifier.defaultValue) 
    141         if showDistribution: 
    142             ret += str(rule.classDistribution) 
    143     elif rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier and type(domain.classVar) == Orange.core.EnumVariable: 
    144         ret = ret + " THEN "+domain.classVar.name+"="+\ 
    145         str(rule.classDistribution.modus()) 
    146         if showDistribution: 
    147             ret += str(rule.classDistribution) 
    148     return ret         
    149111 
    150112class LaplaceEvaluator(RuleEvaluator): 
     
    160122        else: 
    161123            return (max(rule.classDistribution)+1)/(sumDist+len(data.domain.classVar.values)) 
     124 
    162125 
    163126class WRACCEvaluator(RuleEvaluator): 
     
    181144        else: return (pTruePositive-pClass)/max(pRule,1e-6) 
    182145 
    183 class mEstimate(RuleEvaluator): 
    184     def __init__(self, m=2): 
    185         self.m = m 
    186     def __call__(self, rule, data, weightID, targetClass, apriori): 
    187         if not rule.classDistribution: 
    188             return 0. 
    189         sumDist = rule.classDistribution.abs 
    190         if self.m == 0 and not sumDist: 
    191             return 0. 
    192         # get distribution 
    193         if targetClass>-1: 
    194             p = rule.classDistribution[targetClass]+self.m*apriori[targetClass]/apriori.abs 
    195             p = p / (rule.classDistribution.abs + self.m) 
    196         else: 
    197             p = max(rule.classDistribution)+self.m*apriori[rule.classDistribution.modus()]/apriori.abs 
    198             p = p / (rule.classDistribution.abs + self.m)       
    199         return p 
    200  
    201 class RuleStopping_apriori(RuleStoppingCriteria): 
    202     def __init__(self, apriori=None): 
    203         self.apriori =  None 
    204          
    205     def __call__(self,rules,rule,examples,data): 
    206         if not self.apriori: 
    207             return False 
    208         if not type(rule.classifier) == Orange.core.DefaultClassifier: 
    209             return False 
    210         ruleAcc = rule.classDistribution[rule.classifier.defaultVal]/rule.classDistribution.abs 
    211         aprioriAcc = self.apriori[rule.classifier.defaultVal]/self.apriori.abs 
    212         if ruleAcc>aprioriAcc: 
    213             return False 
    214         return True 
    215  
    216 class LengthValidator(RuleValidator): 
    217     """ prune rules with more conditions than self.length. """ 
    218     def __init__(self, length=-1): 
    219         self.length = length 
    220          
    221     def __call__(self, rule, data, weightID, targetClass, apriori): 
    222         if self.length >= 0: 
    223             return len(rule.filter.conditions) <= self.length 
    224         return True     
    225      
    226  
    227 def supervisedClassCheck(examples): 
    228     if not examples.domain.classVar: 
    229         raise Exception("Class variable is required!") 
    230     if examples.domain.classVar.varType == Orange.core.VarTypes.Continuous: 
    231         raise Exception("CN2 requires a discrete class!") 
    232      
     146 
    233147class CN2Learner(RuleLearner): 
     148    """ 
     149    Classical CN2 (see Clark and Niblett; 1988). It learns a set of ordered 
     150    rules, which means that classificator must try these rules in the same 
     151    order as they were learned. 
     152     
     153    """ 
     154     
    234155    def __new__(cls, examples=None, weightID=0, **kwargs): 
     156        """ 
     157        :param examples: Data instances to learn from. If not None, an 
     158            :class:`Orange.classification.rules.CN2Classifier` is returned.  
     159        :type examples: :class:`Orange.data.Table` or None 
     160        :param weightId: ID number of weight attribute, default 0 
     161        :type weightId: integer 
     162        :rtype: :class:`Orange.classification.rules.CN2Learner` or 
     163            :class:`Orange.classification.rules.CN2Classifier` 
     164         
     165        Other named parameters may be passed as defined by the ancestor class. 
     166         
     167        """ 
    235168        self = RuleLearner.__new__(cls, **kwargs) 
    236169        if examples is not None: 
     
    241174         
    242175    def __init__(self, evaluator = RuleEvaluator_Entropy(), beamWidth = 5, alpha = 1.0, **kwds): 
     176        """ 
     177        :param evaluator:   
     178        :type evaluator: :class:`Orange.data.Table` 
     179        :param beamWidth:  
     180        :type beamWidth:  
     181        :param alpha: 
     182        :type alpha: 
     183        :rtype: :class:`Orange.classification.rules.CN2Learner` 
     184         
     185        Other named parameters may be passed as defined by the ancestor class. 
     186         
     187        """ 
    243188        self.__dict__.update(kwds) 
    244189        self.ruleFinder = RuleBeamFinder() 
     
    248193         
    249194    def __call__(self, examples, weight=0): 
     195        """ 
     196        :param examples: Data instances to learn from.  
     197        :type examples: :class:`Orange.data.Table` 
     198        :param weight: ID number of weight attribute, default 0 
     199        :type weight: integer 
     200        :rtype: :class:`Orange.classification.rules.CN2Classifier` 
     201         
     202        Learns from the given table of data instances. 
     203         
     204        """ 
    250205        supervisedClassCheck(examples) 
    251206         
     
    254209        return CN2Classifier(rules, examples, weight) 
    255210 
     211 
    256212class CN2Classifier(RuleClassifier): 
     213    """ 
     214    Classical CN2 (see Clark and Niblett; 1988). Classifies using an ordered 
     215    set of rules. Usually the learner 
     216    (:class:`Orange.classification.rules.CN2Learner`) is used to construct the 
     217    classifier. 
     218     
     219    """ 
    257220    def __init__(self, rules=None, examples=None, weightID = 0, **argkw): 
    258221        self.rules = rules 
     
    289252 
    290253 
    291 # Kako nastavim v c++, da mi ni potrebno dodati imena 
    292254class CN2UnorderedLearner(RuleLearner): 
     255    """ 
     256    CN2 unordered (see Clark and Boswell; 1991). It learns a set of unordered 
     257    rules - classification from rules does not assume ordering of rules - and 
     258    returns an :class:`Orange.classification.rules.CN2UnorderedClassifier`. In 
     259    fact, learning rules is quite similar to learning in classical CN2, where 
     260    the process of learning of rules is separated to learning rules for each 
     261    class, which is implemented in class' __call__ function. Learning of rules 
     262    for each class uses a slightly changed version of classical CN2 algorithm. 
     263     
     264    """ 
    293265    def __new__(cls, examples=None, weightID=0, **kwargs): 
     266        """ 
     267        :param examples: Data instances to learn from. If not None, an 
     268            :class:`Orange.classification.rules.CN2UnorderedClassifier` is 
     269            returned.  
     270        :type examples: :class:`Orange.data.Table` or None 
     271        :param weightId: ID number of weight attribute, default 0 
     272        :type weightId: integer 
     273        :rtype: :class:`Orange.classification.rules.CN2UnorderedLearner` or 
     274            :class:`Orange.classification.rules.CN2UnorderedClassifier` 
     275         
     276        Other named parameters may be passed as defined by the ancestor class. 
     277         
     278        """ 
    294279        self = RuleLearner.__new__(cls, **kwargs) 
    295280        if examples is not None: 
     
    388373            retStr += ruleToString(r)+" "+str(r.classDistribution)+"\n" 
    389374        return retStr 
     375 
     376 
     377class CN2SDUnorderedLearner(CN2UnorderedLearner): 
     378    """ 
     379    CN2-SD (see Lavrac et al.; 2004). It learns a set of unordered rules, which 
     380    is the same as :class:`Orange.classification.rules.CN2UnorderedLearner`. 
     381    The difference between classical CN2 unordered and CN2-SD is selection of 
     382    specific evaluation function and covering function, as mentioned in 
     383    description of 'mult' parameter of __init__ function. 
     384     
     385    """ 
     386    def __new__(cls, examples=None, weightID=0, **kwargs): 
     387        """ 
     388        :param examples: Data instances to learn from. If not None, an 
     389            :class:`Orange.classification.rules.CN2UnorderedClassifier` is 
     390            returned.  
     391        :type examples: :class:`Orange.data.Table` or None 
     392        :param weightId: ID number of weight attribute, default 0 
     393        :type weightId: integer 
     394        :rtype: :class:`Orange.classification.rules.CN2SDUnorderedLearner` or 
     395            :class:`Orange.classification.rules.CN2UnorderedClassifier` 
     396         
     397        Other named parameters may be passed as defined by the ancestor class. 
     398         
     399        """ 
     400        self = CN2UnorderedLearner.__new__(cls, **kwargs) 
     401        if examples is not None: 
     402            self.__init__(**kwargs) 
     403            return self.__call__(examples, weightID) 
     404        else: 
     405            return self 
     406         
     407    def __init__(self, evaluator = WRACCEvaluator(), beamWidth = 5, alpha = 0.05, mult=0.7, **kwds): 
     408        CN2UnorderedLearnerClass.__init__(self, evaluator = evaluator, 
     409                                          beamWidth = beamWidth, alpha = alpha, **kwds) 
     410        self.coverAndRemove = CovererAndRemover_multWeights(mult=mult) 
     411 
     412    def __call__(self, examples, weight=0):         
     413        supervisedClassCheck(examples) 
     414         
     415        oldExamples = Orange.core.ExampleTable(examples) 
     416        classifier = CN2UnorderedLearnerClass.__call__(self,examples,weight) 
     417        for r in classifier.rules: 
     418            r.filterAndStore(oldExamples,weight,r.classifier.defaultVal) 
     419        return classifier 
     420 
     421 
     422def ruleToString(rule, showDistribution = True): 
     423    def selectSign(oper): 
     424        if oper == Orange.core.ValueFilter_continuous.Less: 
     425            return "<" 
     426        elif oper == Orange.core.ValueFilter_continuous.LessEqual: 
     427            return "<=" 
     428        elif oper == Orange.core.ValueFilter_continuous.Greater: 
     429            return ">" 
     430        elif oper == Orange.core.ValueFilter_continuous.GreaterEqual: 
     431            return ">=" 
     432        else: return "=" 
     433 
     434    if not rule: 
     435        return "None" 
     436    conds = rule.filter.conditions 
     437    domain = rule.filter.domain 
     438     
     439    ret = "IF " 
     440    if len(conds)==0: 
     441        ret = ret + "TRUE" 
     442 
     443    for i,c in enumerate(conds): 
     444        if i > 0: 
     445            ret += " AND " 
     446        if type(c) == Orange.core.ValueFilter_discrete: 
     447            ret += domain[c.position].name + "=" + str([domain[c.position].values[int(v)] for v in c.values]) 
     448        elif type(c) == Orange.core.ValueFilter_continuous: 
     449            ret += domain[c.position].name + selectSign(c.oper) + str(c.ref) 
     450    if rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier and rule.classifier.defaultVal: 
     451        ret = ret + " THEN "+domain.classVar.name+"="+\ 
     452        str(rule.classifier.defaultValue) 
     453        if showDistribution: 
     454            ret += str(rule.classDistribution) 
     455    elif rule.classifier and type(rule.classifier) == Orange.core.DefaultClassifier and type(domain.classVar) == Orange.core.EnumVariable: 
     456        ret = ret + " THEN "+domain.classVar.name+"="+\ 
     457        str(rule.classDistribution.modus()) 
     458        if showDistribution: 
     459            ret += str(rule.classDistribution) 
     460    return ret         
     461 
     462 
     463class mEstimate(RuleEvaluator): 
     464    def __init__(self, m=2): 
     465        self.m = m 
     466    def __call__(self, rule, data, weightID, targetClass, apriori): 
     467        if not rule.classDistribution: 
     468            return 0. 
     469        sumDist = rule.classDistribution.abs 
     470        if self.m == 0 and not sumDist: 
     471            return 0. 
     472        # get distribution 
     473        if targetClass>-1: 
     474            p = rule.classDistribution[targetClass]+self.m*apriori[targetClass]/apriori.abs 
     475            p = p / (rule.classDistribution.abs + self.m) 
     476        else: 
     477            p = max(rule.classDistribution)+self.m*apriori[rule.classDistribution.modus()]/apriori.abs 
     478            p = p / (rule.classDistribution.abs + self.m)       
     479        return p 
     480 
     481class RuleStopping_apriori(RuleStoppingCriteria): 
     482    def __init__(self, apriori=None): 
     483        self.apriori =  None 
     484         
     485    def __call__(self,rules,rule,examples,data): 
     486        if not self.apriori: 
     487            return False 
     488        if not type(rule.classifier) == Orange.core.DefaultClassifier: 
     489            return False 
     490        ruleAcc = rule.classDistribution[rule.classifier.defaultVal]/rule.classDistribution.abs 
     491        aprioriAcc = self.apriori[rule.classifier.defaultVal]/self.apriori.abs 
     492        if ruleAcc>aprioriAcc: 
     493            return False 
     494        return True 
     495 
     496class LengthValidator(RuleValidator): 
     497    """ prune rules with more conditions than self.length. """ 
     498    def __init__(self, length=-1): 
     499        self.length = length 
     500         
     501    def __call__(self, rule, data, weightID, targetClass, apriori): 
     502        if self.length >= 0: 
     503            return len(rule.filter.conditions) <= self.length 
     504        return True     
     505     
     506 
     507def supervisedClassCheck(examples): 
     508    if not examples.domain.classVar: 
     509        raise Exception("Class variable is required!") 
     510    if examples.domain.classVar.varType == Orange.core.VarTypes.Continuous: 
     511        raise Exception("CN2 requires a discrete class!") 
     512     
     513 
     514 
     515 
    390516 
    391517class RuleClassifier_bestRule(RuleClassifier): 
     
    521647            self.validator.rules.append(rule) 
    522648        return bool(ru_st) 
    523 # 
    524 #def CN2SDUnorderedLearner(examples = None, weightID=0, **kwds): 
    525 #    cn2 = CN2SDUnorderedLearnerClass(**kwds) 
    526 #    if examples: 
    527 #        return cn2(examples, weightID) 
    528 #    else: 
    529 #        return cn2 
    530      
    531 class CN2SDUnorderedLearner(CN2UnorderedLearner): 
    532     def __new__(cls, examples=None, weightID=0, **kwargs): 
    533         self = CN2UnorderedLearner.__new__(cls, **kwargs) 
    534         if examples is not None: 
    535             self.__init__(**kwargs) 
    536             return self.__call__(examples, weightID) 
    537         else: 
    538             return self 
    539          
    540     def __init__(self, evaluator = WRACCEvaluator(), beamWidth = 5, alpha = 0.05, mult=0.7, **kwds): 
    541         CN2UnorderedLearnerClass.__init__(self, evaluator = evaluator, 
    542                                           beamWidth = beamWidth, alpha = alpha, **kwds) 
    543         self.coverAndRemove = CovererAndRemover_multWeights(mult=mult) 
    544  
    545     def __call__(self, examples, weight=0):         
    546         supervisedClassCheck(examples) 
    547          
    548         oldExamples = Orange.core.ExampleTable(examples) 
    549         classifier = CN2UnorderedLearnerClass.__call__(self,examples,weight) 
    550         for r in classifier.rules: 
    551             r.filterAndStore(oldExamples,weight,r.classifier.defaultVal) 
    552         return classifier 
     649     
    553650 
    554651# Miscellaneous - utility functions 
Note: See TracChangeset for help on using the changeset viewer.