Ignore:
Timestamp:
03/15/11 16:24:04 (3 years ago)
Author:
jzbontar <jure.zbontar@…>
Branch:
default
Convert:
3d018f9c28d06302ffc546818d8c4952df770d2c
Message:

Rename camel case identifiers in logistic regression.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/classification/logreg.py

    r7583 r7747  
    88******************* 
    99 
    10 Implements `logistic regression <http://en.wikipedia.org/wiki/Logistic_regression>`_ 
    11 with an extension for proper treatment of discrete features. 
    12 The algorithm can handle various anomalies in features, such as constant variables 
    13 and singularities, that could make fitting of logistic regression almost 
    14 impossible. Stepwise logistic regression, which iteratively selects the most informative features, 
    15 is also supported. 
    16  
    17  
    18 .. autofunction:: LogRegLearner 
    19 .. autofunction:: StepWiseFSS 
     10Implements `logistic regression 
     11<http://en.wikipedia.org/wiki/Logistic_regression>`_ with an extension for 
     12proper treatment of discrete features.  The algorithm can handle various 
     13anomalies in features, such as constant variables and singularities, that 
     14could make fitting of logistic regression almost impossible. Stepwise 
     15logistic regression, which iteratively selects the most informative 
     16features, is also supported. 
     17 
     18 
     19.. autoclass:: LogRegLearner 
     20.. autoclass:: StepWiseFSS 
    2021.. autofunction:: dump 
    21  
    22 .. autoclass:: StepWiseFSS_class 
    23    :members: 
    2422 
    2523Examples 
     
    144142""" 
    145143 
    146 from Orange.core import LogRegLearner, LogRegClassifier, LogRegFitter, LogRegFitter_Cholesky 
     144#from Orange.core import LogRegLearner, LogRegClassifier, LogRegFitter, LogRegFitter_Cholesky 
    147145 
    148146import Orange 
     
    157155 
    158156def dump(classifier): 
    159     """ Formatted print to console of all major features in logistic 
     157    """ Formatted string of all major features in logistic 
    160158    regression classifier.  
    161159 
     
    164162 
    165163    # print out class values 
    166     print 
    167     print "class attribute = " + classifier.domain.classVar.name 
    168     print "class values = " + str(classifier.domain.classVar.values) 
    169     print 
     164    out = [''] 
     165    out.append("class attribute = " + classifier.domain.classVar.name) 
     166    out.append("class values = " + str(classifier.domain.classVar.values)) 
     167    out.append('') 
    170168     
    171169    # get the longest attribute name 
     
    177175    # print out the head 
    178176    formatstr = "%"+str(longest)+"s %10s %10s %10s %10s %10s" 
    179     print formatstr % ("Feature", "beta", "st. error", "wald Z", "P", "OR=exp(beta)") 
    180     print 
     177    out.append(formatstr % ("Feature", "beta", "st. error", "wald Z", "P", "OR=exp(beta)")) 
     178    out.append('') 
    181179    formatstr = "%"+str(longest)+"s %10.2f %10.2f %10.2f %10.2f"     
    182     print formatstr % ("Intercept", classifier.beta[0], classifier.beta_se[0], classifier.wald_Z[0], classifier.P[0]) 
     180    out.append(formatstr % ("Intercept", classifier.beta[0], classifier.beta_se[0], classifier.wald_Z[0], classifier.P[0])) 
    183181    formatstr = "%"+str(longest)+"s %10.2f %10.2f %10.2f %10.2f %10.2f"     
    184182    for i in range(len(classifier.continuizedDomain.attributes)): 
    185         print formatstr % (classifier.continuizedDomain.attributes[i].name, classifier.beta[i+1], classifier.beta_se[i+1], classifier.wald_Z[i+1], abs(classifier.P[i+1]), math.exp(classifier.beta[i+1])) 
     183        out.append(formatstr % (classifier.continuizedDomain.attributes[i].name, classifier.beta[i+1], classifier.beta_se[i+1], classifier.wald_Z[i+1], abs(classifier.P[i+1]), math.exp(classifier.beta[i+1]))) 
     184 
     185    return '\n'.join(out) 
    186186         
    187187 
    188 def hasDiscreteValues(domain): 
     188def has_discrete_values(domain): 
    189189    for at in domain.attributes: 
    190190        if at.varType == Orange.core.VarTypes.Discrete: 
     
    192192    return 0 
    193193 
    194 def LogRegLearner(table=None, weightID=0, **kwds): 
     194class LogRegLearner(Orange.classification.Learner): 
    195195    """ Logistic regression learner. 
    196196 
     
    206206    :type removeSingular: bool 
    207207    :param fitter: the fitting algorithm (by default the Newton-Raphson fitting algorithm is used) 
    208     :type fitter: type??? 
    209208    :param stepwiseLR: set to 1 if you wish to use stepwise logistic regression 
    210209    :type stepwiseLR: bool 
     
    215214    :param numFeatures: parameter for stepwise feature selection 
    216215    :type numFeatures: int 
    217     :rtype: :obj:`LogRegLearnerClass` or :obj:`LogRegClassifier` 
     216    :rtype: :obj:`LogRegLearner` or :obj:`LogRegClassifier` 
    218217 
    219218    """ 
    220     lr = LogRegLearnerClass(**kwds) 
    221     if table: 
    222         return lr(table, weightID) 
    223     else: 
    224         return lr 
    225  
    226 class LogRegLearnerClass(Orange.core.Learner): 
     219    def __new__(cls, instances=None, weightID=0, **argkw): 
     220        self = Orange.classification.Learner.__new__(cls, **argkw) 
     221        if instances: 
     222            self.__init__(**argkw) 
     223            return self.__call__(instances, weightID) 
     224        else: 
     225            return self 
     226 
    227227    def __init__(self, removeSingular=0, fitter = None, **kwds): 
    228228        self.__dict__.update(kwds) 
     
    273273 
    274274 
    275 def Univariate_LogRegLearner(examples=None, **kwds): 
    276     learner = apply(Univariate_LogRegLearner_Class, (), kwds) 
    277     if examples: 
    278         return learner(examples) 
    279     else: 
    280         return learner 
    281  
    282 class Univariate_LogRegLearner_Class(Orange.core.Learner): 
     275class UnivariateLogRegLearner(Orange.classification.Learner): 
     276    def __new__(cls, instances=None, **argkw): 
     277        self = Orange.classification.Learner.__new__(cls, **argkw) 
     278        if instances: 
     279            self.__init__(**argkw) 
     280            return self.__call__(instances) 
     281        else: 
     282            return self 
     283 
    283284    def __init__(self, **kwds): 
    284285        self.__dict__.update(kwds) 
     
    296297        return Univariate_LogRegClassifier(beta = beta, beta_se = beta_se, P = P, wald_Z = wald_Z, domain = domain) 
    297298 
    298 class Univariate_LogRegClassifier(Orange.core.Classifier): 
     299class UnivariateLogRegClassifier(Orange.core.Classifier): 
    299300    def __init__(self, **kwds): 
    300301        self.__dict__.update(kwds) 
     
    305306     
    306307 
    307 def LogRegLearner_getPriors(examples = None, weightID=0, **kwds): 
    308     lr = LogRegLearnerClass_getPriors(**kwds) 
    309     if examples: 
    310         return lr(examples, weightID) 
    311     else: 
    312         return lr 
    313  
    314 class LogRegLearnerClass_getPriors(object): 
     308class LogRegLearnerGetPriors(object): 
     309    def __new__(cls, instances=None, weightID=0, **argkw): 
     310        self = object.__new__(cls, **argkw) 
     311        if instances: 
     312            self.__init__(**argkw) 
     313            return self.__call__(instances, weightID) 
     314        else: 
     315            return self 
     316 
    315317    def __init__(self, removeSingular=0, **kwds): 
    316318        self.__dict__.update(kwds) 
     
    423425        #return (bayes_prior,orig_model.beta[examples.domain.classVar],logistic_prior) 
    424426 
    425 class LogRegLearnerClass_getPriors_OneTable: 
     427class LogRegLearnerGetPriorsOneTable: 
    426428    def __init__(self, removeSingular=0, **kwds): 
    427429        self.__dict__.update(kwds) 
     
    524526###################################### 
    525527 
    526 def Pr(x, betas): 
     528def pr(x, betas): 
    527529    k = math.exp(dot(x, betas)) 
    528530    return k / (1+k) 
     
    531533    llh = 0.0 
    532534    for i,x_i in enumerate(x): 
    533         pr = Pr(x_i,betas) 
     535        pr = pr(x_i,betas) 
    534536        llh += y[i]*log(max(pr,1e-6)) + (1-y[i])*log(max(1-pr,1e-6)) 
    535537    return llh 
     
    542544    return mat 
    543545     
    544 class simpleFitter(Orange.core.LogRegFitter): 
     546class SimpleFitter(Orange.core.LogRegFitter): 
    545547    def __init__(self, penalty=0, se_penalty = False): 
    546548        self.penalty = penalty 
     
    566568        pen_matrix = array([self.penalty] * (len(data.domain.attributes)+1)) 
    567569        if self.se_penalty: 
    568             p = array([Pr(X[i], betas) for i in range(len(data))]) 
     570            p = array([pr(X[i], betas) for i in range(len(data))]) 
    569571            W = identity(len(data), Float) 
    570572            pp = p * (1.0-p) 
     
    581583            likelihood = likelihood_new 
    582584            oldBetas = betas 
    583             p = array([Pr(X[i], betas) for i in range(len(data))]) 
     585            p = array([pr(X[i], betas) for i in range(len(data))]) 
    584586 
    585587            W = identity(len(data), Float) 
     
    604606             
    605607##        XX = sqrt(diagonal(inverse(matrixmultiply(transpose(X),X)))) 
    606 ##        yhat = array([Pr(X[i], betas) for i in range(len(data))]) 
     608##        yhat = array([pr(X[i], betas) for i in range(len(data))]) 
    607609##        ss = sum((y - yhat) ** 2) / (N - len(data.domain.attributes) - 1) 
    608610##        sigma = math.sqrt(ss) 
    609         p = array([Pr(X[i], betas) for i in range(len(data))]) 
     611        p = array([pr(X[i], betas) for i in range(len(data))]) 
    610612        W = identity(len(data), Float) 
    611613        pp = p * (1.0-p) 
     
    622624        return (self.OK, beta, beta_se, 0) 
    623625 
    624 def Pr_bx(bx): 
     626def pr_bx(bx): 
    625627    if bx > 35: 
    626628        return 1 
     
    629631    return exp(bx)/(1+exp(bx)) 
    630632 
    631 class bayesianFitter(Orange.core.LogRegFitter): 
     633class BayesianFitter(Orange.core.LogRegFitter): 
    632634    def __init__(self, penalty=0, anch_examples=[], tau = 0): 
    633635        self.penalty = penalty 
     
    635637        self.tau = tau 
    636638 
    637     def createArrayData(self,data): 
     639    def create_array_data(self,data): 
    638640        if not len(data): 
    639641            return (array([]),array([])) 
     
    653655     
    654656    def __call__(self, data, weight=0): 
    655         (X,y)=self.createArrayData(data) 
     657        (X,y)=self.create_array_data(data) 
    656658 
    657659        exTable = Orange.core.ExampleTable(data.domain) 
    658660        for id,ex in self.anch_examples: 
    659661            exTable.extend(Orange.core.ExampleTable(ex,data.domain)) 
    660         (X_anch,y_anch)=self.createArrayData(exTable) 
     662        (X_anch,y_anch)=self.create_array_data(exTable) 
    661663 
    662664        betas = array([0.0] * (len(data.domain.attributes)+1)) 
    663665 
    664         likelihood,betas = self.estimateBeta(X,y,betas,[0]*(len(betas)),X_anch,y_anch) 
     666        likelihood,betas = self.estimate_beta(X,y,betas,[0]*(len(betas)),X_anch,y_anch) 
    665667 
    666668        # get attribute groups atGroup = [(startIndex, number of values), ...) 
     
    681683##            print "1", concatenate((betas[:i+1],betas[i+2:])) 
    682684##            print "2", betas 
    683             likelihood_temp,betas_temp=self.estimateBeta(X_temp,y,concatenate((betas[:ag[0]+1],betas[ag[0]+ag[1]+1:])),[0]+[1]*(len(betas)-1-ag[1]),X_anch_temp,y_anch) 
     685            likelihood_temp,betas_temp=self.estimate_beta(X_temp,y,concatenate((betas[:ag[0]+1],betas[ag[0]+ag[1]+1:])),[0]+[1]*(len(betas)-1-ag[1]),X_anch_temp,y_anch) 
    684686            print "finBetas", betas, betas_temp 
    685687            print "betas", betas[0], betas_temp[0] 
     
    700702      
    701703         
    702     def estimateBeta(self,X,y,betas,const_betas,X_anch,y_anch): 
     704    def estimate_beta(self,X,y,betas,const_betas,X_anch,y_anch): 
    703705        N,N_anch = len(y),len(y_anch) 
    704706        r,r_anch = array([dot(X[i], betas) for i in range(N)]),\ 
    705707                   array([dot(X_anch[i], betas) for i in range(N_anch)]) 
    706         p    = array([Pr_bx(ri) for ri in r]) 
     708        p    = array([pr_bx(ri) for ri in r]) 
    707709        X_sq = X*X 
    708710 
     
    718720                dl = matrixmultiply(X[:,j],transpose(y-p)) 
    719721                for xi,x in enumerate(X_anch): 
    720                     dl += self.penalty*x[j]*(y_anch[xi] - Pr_bx(r_anch[xi]*self.penalty)) 
     722                    dl += self.penalty*x[j]*(y_anch[xi] - pr_bx(r_anch[xi]*self.penalty)) 
    721723 
    722724                ddl = matrixmultiply(X_sq[:,j],transpose(p*(1-p))) 
    723725                for xi,x in enumerate(X_anch): 
    724                     ddl += self.penalty*x[j]*Pr_bx(r[xi]*self.penalty)*(1-Pr_bx(r[xi]*self.penalty)) 
     726                    ddl += self.penalty*x[j]*pr_bx(r[xi]*self.penalty)*(1-pr_bx(r[xi]*self.penalty)) 
    725727 
    726728                if j==0: 
     
    739741                dv = min(max(dv,-max_delta[j]),max_delta[j]) 
    740742                r+= X[:,j]*dv 
    741                 p = array([Pr_bx(ri) for ri in r]) 
     743                p = array([pr_bx(ri) for ri in r]) 
    742744                if N_anch: 
    743745                    r_anch+=X_anch[:,j]*dv 
     
    764766#  Feature subset selection for logistic regression 
    765767 
    766  
    767 def StepWiseFSS(table=None, **kwds): 
    768     """Implementation of algorithm described in [Hosmer and Lemeshow, Applied Logistic Regression, 2000]. 
    769  
    770     If :obj:`table` is specified, stepwise logistic regression implemented 
    771     in :obj:`StepWiseFSS_class` is performed and a list of chosen features 
    772     is returned. If :obj:`table` is not specified an instance of 
    773     :obj:`StepWiseFSS_class` with all parameters set is returned. 
    774  
    775     :param table: data set 
    776     :type table: Orange.data.Table 
    777  
    778     :param addCrit: "Alpha" level to judge if variable has enough importance to be added in the new set. (e.g. if addCrit is 0.2, then features is added if its P is lower than 0.2) 
    779     :type addCrit: float 
    780  
    781     :param deleteCrit: Similar to addCrit, just that it is used at backward elimination. It should be higher than addCrit! 
    782     :type deleteCrit: float 
    783  
    784     :param numFeatures: maximum number of selected features, use -1 for infinity. 
    785     :type numFeatures: int 
    786     :rtype: :obj:`StepWiseFSS_class` or list of features 
    787  
    788     """ 
    789  
    790     """ 
    791       Constructs and returns a new set of table that includes a 
    792       class and features selected by stepwise logistic regression. This is an 
    793       implementation of algorithm described in [Hosmer and Lemeshow, Applied Logistic Regression, 2000] 
    794  
    795       table: data set (ExampleTable)      
    796       addCrit: "Alpha" level to judge if variable has enough importance to be added in the new set. (e.g. if addCrit is 0.2, then attribute is added if its P is lower than 0.2) 
    797       deleteCrit: Similar to addCrit, just that it is used at backward elimination. It should be higher than addCrit! 
    798       numFeatures: maximum number of selected features, use -1 for infinity 
    799  
    800     """ 
    801  
    802     fss = apply(StepWiseFSS_class, (), kwds) 
    803     if table is not None: 
    804         return fss(table) 
    805     else: 
    806         return fss 
    807  
    808 def getLikelihood(fitter, examples): 
     768def get_likelihood(fitter, examples): 
    809769    res = fitter(examples) 
    810770    if res[0] in [fitter.OK]: #, fitter.Infinity, fitter.Divergence]: 
     
    818778 
    819779 
    820 class StepWiseFSS_class(Orange.core.Learner): 
    821   """ Perform stepwise logistic regression and return a list of the 
     780class StepWiseFSS(Orange.classification.Learner): 
     781  """Implementation of algorithm described in [Hosmer and Lemeshow, Applied Logistic Regression, 2000]. 
     782 
     783  Perform stepwise logistic regression and return a list of the 
    822784  most "informative" features. Each step of the algorithm is composed 
    823785  of two parts. The first is backward elimination, where each already 
     
    838800  follow a binomial distribution. 
    839801 
     802  If :obj:`table` is specified, stepwise logistic regression implemented 
     803  in :obj:`StepWiseFSS` is performed and a list of chosen features 
     804  is returned. If :obj:`table` is not specified an instance of 
     805  :obj:`StepWiseFSS` with all parameters set is returned. 
     806 
     807  :param table: data set 
     808  :type table: Orange.data.Table 
     809 
     810  :param addCrit: "Alpha" level to judge if variable has enough importance to be added in the new set. (e.g. if addCrit is 0.2, then features is added if its P is lower than 0.2) 
     811  :type addCrit: float 
     812 
     813  :param deleteCrit: Similar to addCrit, just that it is used at backward elimination. It should be higher than addCrit! 
     814  :type deleteCrit: float 
     815 
     816  :param numFeatures: maximum number of selected features, use -1 for infinity. 
     817  :type numFeatures: int 
     818  :rtype: :obj:`StepWiseFSS` or list of features 
     819 
    840820  """ 
    841821 
     822  def __new__(cls, instances=None, **argkw): 
     823      self = Orange.classification.Learner.__new__(cls, **argkw) 
     824      if instances: 
     825          self.__init__(**argkw) 
     826          return self.__call__(instances) 
     827      else: 
     828          return self 
     829 
     830 
    842831  def __init__(self, addCrit=0.2, deleteCrit=0.3, numFeatures = -1, **kwds): 
    843  
    844832    self.__dict__.update(kwds) 
    845833    self.addCrit = addCrit 
     
    862850    tempData  = Orange.core.Preprocessor_dropMissing(examples.select(tempDomain)) 
    863851 
    864     ll_Old = getLikelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
     852    ll_Old = get_likelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
    865853    ll_Best = -1000000 
    866854    length_Old = float(len(tempData)) 
     
    886874                tempData = Orange.core.Preprocessor_dropMissing(examples.select(tempDomain)) 
    887875 
    888                 ll_Delete = getLikelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
     876                ll_Delete = get_likelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
    889877                length_Delete = float(len(tempData)) 
    890878                length_Avg = (length_Delete + length_Old)/2.0 
     
    932920            tempDomain  = continuizer(Orange.core.Preprocessor_dropMissing(examples.select(tempDomain))) 
    933921            tempData = Orange.core.Preprocessor_dropMissing(examples.select(tempDomain)) 
    934             ll_New = getLikelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
     922            ll_New = get_likelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
    935923 
    936924            length_New = float(len(tempData)) # get number of examples in tempData to normalize likelihood 
     
    965953 
    966954 
    967 def StepWiseFSS_Filter(examples = None, **kwds): 
    968     """ 
    969         check function StepWiseFSS() 
    970     """ 
    971  
    972     filter = apply(StepWiseFSS_Filter_class, (), kwds) 
    973     if examples is not None: 
    974         return filter(examples) 
    975     else: 
    976         return filter 
    977  
    978  
    979 class StepWiseFSS_Filter_class(object): 
     955class StepWiseFSSFilter(object): 
     956    def __new__(cls, instances=None, **argkw): 
     957        self = object.__new__(cls, **argkw) 
     958        if instances: 
     959            self.__init__(**argkw) 
     960            return self.__call__(instances) 
     961        else: 
     962            return self 
     963     
    980964    def __init__(self, addCrit=0.2, deleteCrit=0.3, numFeatures = -1): 
    981965        self.addCrit = addCrit 
    982966        self.deleteCrit = deleteCrit 
    983967        self.numFeatures = numFeatures 
     968 
    984969    def __call__(self, examples): 
    985970        attr = StepWiseFSS(examples, addCrit=self.addCrit, deleteCrit = self.deleteCrit, numFeatures = self.numFeatures) 
Note: See TracChangeset for help on using the changeset viewer.