Changeset 8789:b656b27a43bd in orange
 Timestamp:
 08/26/11 09:05:07 (3 years ago)
 Branch:
 default
 Convert:
 44df07f6ca97f59d740d809afab233860f761273
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/regression/linear.py
r8042 r8789 1 """\ 2 ============================== 3 Linear regression (``linear``) 4 ============================== 5 6 .. index:: regression, linear model 7 8 .. _`Linear regression`: http://en.wikipedia.org/wiki/Linear_regression 9 10 Example :: 11 12 >>> from Orange.regression import linear 13 >>> table = Orange.data.Table("housing") 14 >>> c = linear.LinearRegressionLearner(table) 15 >>> linear.print_linear_regression_model(c) 16 17 Variable Coeff Est Std Error tvalue p 18 Intercept 36.459 5.103 7.144 0.000 *** 19 CRIM 0.108 0.033 3.287 0.001 ** 20 ZN 0.046 0.014 3.382 0.001 *** 21 INDUS 0.021 0.061 0.334 0.738 22 CHAS 2.687 0.862 3.118 0.002 ** 23 NOX 17.767 3.820 4.651 0.000 *** 24 RM 3.810 0.418 9.116 0.000 *** 25 AGE 0.001 0.013 0.052 0.958 26 DIS 1.476 0.199 7.398 0.000 *** 27 RAD 0.306 0.066 4.613 0.000 *** 28 TAX 0.012 0.004 3.280 0.001 ** 29 PTRATIO 0.953 0.131 7.283 0.000 *** 30 B 0.009 0.003 3.467 0.001 *** 31 LSTAT 0.525 0.051 10.347 0.000 *** 32 Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 empty 1 33 34 >>> 35 36 37 .. autoclass:: LinearRegressionLearner 38 :members: 39 40 .. autoclass:: LinearRegression 41 :members: 42 43 Utility functions 44  45 46 .. autofunction:: print_linear_regression_model 47 48 .. autofunction:: stepwise 49 50 51 """ 52 53 54 import Orange 55 from Orange.regression import base 1 56 import numpy 57 import scipy.stats 58 2 59 from numpy import dot, sqrt 3 from numpy.linalg import inv 4 5 import Orange 6 import statc 7 8 9 ######################################################################## 10 # Linear Regression 11 12 class LinearRegressionLearner(object): 13 def __new__(self, data=None, name='linear regression', **kwds): 14 learner = object.__new__(self, **kwds) 15 if data: 16 learner.__init__(name,**kwds) # force init 17 return learner(data) 18 else: 19 return learner # invokes the __init__ 20 21 def __init__(self, name='linear regression', beta0=True, 22 use_attributes=None, stepwise=False, add_sig=0.05, 23 remove_sig=0.2, stepwise_before=True, **kw): 60 from numpy.linalg import inv, pinv 61 62 63 class LinearRegressionLearner(base.BaseRegressionLearner): 64 65 """Fits the linear regression model, i.e. learns the regression parameters 66 The class is derived from 67 :class:`Orange.regression.base.BaseRegressionLearner` 68 which is used for preprocessing the data (continuization and imputation) 69 before fitting the regression parameters 70 71 .. attribute:: F 72 73 Fstatistics of the model. 74 75 .. attribute:: coefficients 76 77 list of regression coefficients. If the intercept is included 78 the first item corresponds to the estimated intercept 79 80 .. attribute:: stdError 81 82 list of standard errors of the coefficient estimator. 83 84 .. attribute:: tScores 85 86 list of tscores for the estimated regression coefficients 87 88 .. attribute:: pVals 89 90 list of pvalues for the null hypothesis that the regression 91 coefficients equal 0 based on tscores and two sided 92 alternative hypothesis 93 94 .. attribute:: dictModel 95 96 dictionary of statistical properties of the model. 97 Keys  names of the independent variables (or "Intercept") 98 Values  tuples (coefficient, standard error, 99 tvalue, pvalue) 100 101 .. attribute:: fitted 102 103 estimated values of the dependent variable for all instances 104 from the table 105 106 .. attribute:: residuals 107 108 differences between estimated and actual values of the 109 dependent variable for all instances from the table 110 111 .. attribute:: m 112 113 number of independent variables 114 115 .. attribute:: n 116 117 number of instances 118 119 .. attribute:: muY 120 121 the sample mean of the dependent variable 122 123 .. attribute:: r2 124 125 _`coefficient of determination`: 126 http://en.wikipedia.org/wiki/Coefficient_of_determination 127 128 .. attribute:: r2adj 129 130 adjusted coefficient of determination 131 132 .. attribute:: sst, sse, ssr 133 134 total sum of squares, explained sum of squares and 135 residual sum of squares respectively 136 137 .. attribute:: stdCoefficients 138 139 standardized regression coefficients 140 141 142 """ 143 144 def __init__(self, name='linear regression', intercept=True, \ 145 computeStats=True, ridgeLambda=None,\ 146 imputer=None, continuizer=None, \ 147 useVars=None, stepwise=False, addSig=0.05, 148 removeSig=0.2, **kwds): 149 """ 150 :param name: name of the linear model, default 'linear regression' 151 :type name: string 152 :param intercept: if True, the intercept beta0 is included 153 in the model 154 :type intercept: boolean 155 :param computeStats: if True, statistical properties of 156 the estimators (standard error, tscores, significances) 157 and statistical properties of the model 158 (sum of squares, R2, adjusted R2) are computed 159 :type computeStats: boolean 160 :param ridgeLambda: if not None, the lambda parameter 161 in ridge regression 162 :type ridgeLambda: integer or None 163 :param useVars: the list of independent varaiables included in 164 regression model. If None (default) all variables are used 165 :type useVars: list of Orange.data.variable or None 166 :param stepwise: if True, _`stepwise regression`: 167 http://en.wikipedia.org/wiki/Stepwise_regression 168 based on Ftest is performed. The significance parameters are 169 addSig and removeSig 170 :type stepwise: boolean 171 :param addSig: lower bound of significance for which the variable 172 is included in regression model 173 default value = 0.05 174 :type addSig: float 175 :param removeSig: upper bound of significance for which 176 the variable is excluded from the regression model 177 default value = 0.2 178 :type removeSig: float 179 """ 24 180 self.name = name 25 self.beta0 = beta0 181 self.intercept = intercept 182 self.computeStats = computeStats 183 self.ridgeLambda = ridgeLambda 184 self.set_imputer(imputer=imputer) 185 self.set_continuizer(continuizer=continuizer) 26 186 self.stepwise = stepwise 27 self.stepwise_before = stepwise_before 28 self.add_sig = add_sig 29 self.remove_sig = remove_sig 30 self.use_attributes = use_attributes 31 self.__dict__.update(kw) 32 33 def __call__(self, data, weight=None): 34 if not self.use_attributes == None: 35 new_domain = Orange.data.Domain(self.use_attributes, 36 data.domain.class_var) 37 new_domain.addmetas(data.domain.getmetas()) 38 data = Orange.data.Table(new_domain, data) 39 40 if self.stepwise and self.stepwise_before: 41 use_attributes=stepwise(data, weight, add_sig=self.add_sig, 42 remove_sig=self.remove_sig) 43 new_domain = Orange.data.Domain(use_attributes, data.domain.class_var) 44 new_domain.addmetas(data.domain.getmetas()) 45 data = Orange.data.Table(new_domain, data) 46 47 # continuization (replaces discrete with continuous attributes) 48 continuizer = Orange.feature.continuization.DomainContinuizer() 49 continuizer.multinomial_treatment = continuizer.FrequentIsBase 50 continuizer.zero_based = True 51 domain0 = continuizer(data) 52 data = data.translate(domain0) 53 54 if self.stepwise and not self.stepwise_before: 55 use_attributes = stepwise(data, weight, add_sig=self.add_sig, 56 remove_sig=self.remove_sig) 57 new_domain = Orange.data.Domain(use_attributes, data.domain.class_var) 58 new_domain.addmetas(data.domain.getmetas()) 59 data = Orange.data.Table(new_domain, data) 187 self.addSig = addSig 188 self.removeSig = removeSig 189 self.useVars = useVars 190 self.__dict__.update(kwds) 60 191 61 # missing values handling (impute missing) 62 imputer = Orange.feature.imputation.ImputerConstructor_model() 63 imputer.learner_continuous = Orange.regression.mean.MeanLearner() 64 imputer.learner_discrete = Orange.classification.majority.MajorityLearner() 65 imputer = imputer(data) 66 data = imputer(data) 192 def __call__(self, table, weight=None, verbose=0): 193 """ 194 :param table: data instances. 195 :type table: :class:`Orange.data.Table` 196 :param weight: the weights for instances. Default: None, i.e. 197 all data instances are eqaully important in fitting 198 the regression parameters 199 :type weight: None or list of Orange.data.variable.Continuous 200 which stores weights for instances 201 """ 202 if not self.useVars is None: 203 newDomain = Orange.data.Domain(self.useVars, 204 table.domain.class_var) 205 newDomain.addmetas(table.domain.getmetas()) 206 table = Orange.data.Table(newDomain, table) 207 208 # dicrete values are continuized 209 table = self.continuize_table(table) 210 211 # missing values are imputed 212 table = self.impute_table(table) 213 214 if self.stepwise: 215 useVars = stepwise(table, weight, addSig=self.addSig, 216 removeSig=self.removeSig) 217 newDomain = Orange.data.Domain(useVars, table.domain.class_var) 218 newDomain.addmetas(table.domain.getmetas()) 219 table = Orange.data.Table(newDomain, table) 67 220 68 221 # convertion to numpy 69 A, y, w = data.to_numpy() # weights ?? 70 if A == None: 71 n = len(data) 72 m = 0 222 A, y, w = table.to_numpy() 223 if A is None: 224 n, m = len(table), 0 73 225 else: 74 226 n, m = numpy.shape(A) 75 227 76 if self. beta0 == True:77 if A ==None:78 X = numpy.ones([ len(data),1])228 if self.intercept: 229 if A is None: 230 X = numpy.ones([n,1]) 79 231 else: 80 232 X = numpy.insert(A, 0, 1, axis=1) # adds a column of ones … … 82 234 X = A 83 235 84 # set weights 85 W = numpy.identity(len(data)) 236 self.domain, self.m, self.n = table.domain, m, n 237 238 if numpy.std(y) < 10e6: # almost constant variable 239 return Orange.regression.mean.MeanLearner(table) 240 241 # set weights to the instances 242 W = numpy.identity(n) 86 243 if weight: 87 for di, d in enumerate(data):88 W[ di, di] = float(d[weight])244 for i, ins in enumerate(table): 245 W[i, i] = float(ins[weight]) 89 246 90 247 # adds some robustness by computing the pseudo inverse; 91 248 # normal inverse could fail due to singularity of the X.T * W * X 92 D = dot(dot(numpy.linalg.pinv(dot(dot(X.T, W), X)), X.T), W) 93 beta = dot(D, y) 94 95 yEstimated = dot(X, beta) # estimation 96 # some desriptive statistisc 97 muY, sigmaY = numpy.mean(y), numpy.std(y) 98 muX, covX = numpy.mean(X, axis=0), numpy.cov(X, rowvar=0) 99 100 # model statistics 101 SST = numpy.sum((y  muY) ** 2) 102 SSR = numpy.sum((yEstimated  muY) ** 2) 103 SSE, RSquare = SST  SSR, SSR / SST 104 R = numpy.sqrt(RSquare) # coefficient of determination 105 RAdjusted = 1  (1  RSquare) * (n  1) / (n  m  1) 106 F = (SSR / m) / (SST  SSR / (n  m  1)) # F statistisc 107 df = m  1 108 109 sigmaSquare = SSE / (n  m  1) 110 111 # standard error of estimated coefficients 112 errCoeff = sqrt(sigmaSquare * inv(dot(X.T, X)).diagonal()) 113 114 # t statistisc, significance 115 t = beta / errCoeff 116 df = n  2 117 significance = [] 118 for tt in t: 119 try: 120 significance.append(statc.betai(df * 0.5, 0.5, 121 df / (df + tt * tt))) 122 except: 123 significance.append(1.0) 124 125 # standardized coefficients 126 if m > 0: 127 stdCoeff = (sqrt(covX.diagonal()) / sigmaY) * beta 249 if self.ridgeLambda is None: 250 cov = pinv(dot(dot(X.T, W), X)) 128 251 else: 129 stdCoeff = (sqrt(covX) / sigmaY) * beta 130 131 model = {'descriptives': { 'meanX': muX, 'covX': covX, 'meanY': muY, 132 'sigmaY': sigmaY}, 133 'model' : {'estCoeff': beta, 'stdErrorEstimation': errCoeff}, 134 'model summary': {'TotalVar': SST, 'ExplVar': SSE, 135 'ResVar': SSR, 'R': R, 'RAdjusted': RAdjusted, 136 'F': F, 't': t, 'sig': significance}} 137 return LinearRegression(statistics=model, domain=data.domain, 138 name=self.name, beta0=self.beta0, imputer=imputer) 252 cov = pinv(dot(dot(X.T, W), X)  self.ridgeLambda*numpy.eye(m+1)) 253 self.computeStats = False # TO DO: find inferential properties of the estimators 254 D = dot(dot(cov, X.T), W) 255 self.coefficients = dot(D, y) 256 257 self.muY, sigmaY = numpy.mean(y), numpy.std(y) 258 if A is not None: 259 covX = numpy.cov(X, rowvar=0) 260 261 # standardized coefficients 262 self.stdCoefficients = (sqrt(covX.diagonal()) / sigmaY) \ 263 * self.coefficients 264 265 if self.computeStats is False: 266 return LinearRegression(self) 267 268 self.fitted = dot(X, self.coefficients) 269 self.residuals = [ins.get_class()self.fitted[i] \ 270 for i, ins in enumerate(table)] 271 272 # model summary 273 # total sum of squares (total variance) 274 self.sst = numpy.sum((y  self.muY) ** 2) 275 # sum of squares due to regression (explained variance) 276 self.ssr = numpy.sum((self.fitted  self.muY)**2) 277 # eror sum of squares (unexplaied variance) 278 self.sse = self.sst  self.ssr 279 # coefficient of determination 280 self.r2 = self.ssr / self.sst 281 self.r2adj = 1(1self.r2)*(n1)/(nm1) 282 self.F = (self.ssr/m)/(self.sstself.ssr/(nm1)) 283 df = n2 284 sigmaSquare = self.sse/(nm1) 285 # standard error of the regression estimator, tscores and pvalues 286 self.stdError = sqrt(sigmaSquare*pinv(dot(X.T, X)).diagonal()) 287 self.tScores = self.coefficients/self.stdError 288 self.pVals=[scipy.stats.betai(df*0.5,0.5,df/(df + t*t)) \ 289 for t in self.tScores] 290 291 # dictionary of regression coefficients with standard errors 292 # and pvalues 293 self.dictModel = {} 294 if self.intercept: 295 self.dictModel["Intercept"] = (self.coefficients[0],\ 296 self.stdError[0], \ 297 self.tScores[0], \ 298 self.pVals[0]) 299 for i, var in enumerate(self.domain.attributes): 300 j = i+1 if self.intercept else i 301 self.dictModel[var.name] = (self.coefficients[j], \ 302 self.stdError[j],\ 303 self.tScores[j],\ 304 self.pVals[j]) 305 306 return LinearRegression(self) 307 139 308 140 309 class LinearRegression(Orange.classification.Classifier): 141 def __init__(self, **kwds): 142 for a, b in kwds.items(): 143 self.setattr(a, b) 144 self.beta = self.statistics['model']['estCoeff'] 145 146 def __call__(self, example, resultType=Orange.classification.Classifier.GetValue): 147 ex = Orange.data.Instance(self.domain, example) 148 ex = self.imputer(ex) 149 ex = numpy.array(ex.native()) 150 151 if self.beta0: 152 if len(self.beta) > 1: 153 yhat = self.beta[0] + dot(self.beta[1:], ex[:1]) 310 311 """Linear regression predicts value of the response variable 312 based on the values of independent variables. 313 314 .. attribute:: model 315 316 fitted linear regression model 317 318 """ 319 320 321 322 def __init__(self, model): 323 """ 324 :param model: fitted linear regression model 325 :type model: :class:`LinearRegressionLearner` 326 """ 327 self.model = model 328 329 def __call__(self, instance, \ 330 resultType=Orange.classification.Classifier.GetValue): 331 """ 332 :param instance: data instance for which the value of the response 333 variable will be predicted 334 :type instance: 335 """ 336 ins = Orange.data.Instance(self.model.domain, instance) 337 ins = numpy.array(ins.native()) 338 if "?" in ins: # missing value > corresponding coefficient omitted 339 def miss_2_0(x): return x if x != "?" else 0 340 ins = map(miss_2_0, ins) 341 342 if self.model.intercept: 343 if len(self.model.coefficients) > 1: 344 yHat = self.model.coefficients[0] + \ 345 dot(self.model.coefficients[1:], ins[:1]) 154 346 else: 155 yhat = self.beta[0] 347 if len(ins) == 1: 348 print ins 349 yHat = self.model.muY 350 else: 351 yHat = dot(self.model.coefficients, ins[:1]) 156 352 else: 157 y hat = dot(self.beta, ex[:1])158 y hat = Orange.data.Value(yhat)353 yHat = dot(self.model.coefficients, ins[:1]) 354 yHat = Orange.data.Value(yHat) 159 355 160 356 if resultType == Orange.classification.Classifier.GetValue: 161 return y hat357 return yHat 162 358 if resultType == Orange.classification.Classifier.GetProbabilities: 163 return Orange.statistics.distribution.Continuous({1.0: yhat}) 164 return (yhat, Orange.statistics.distribution.Continuous({1.0: yhat})) 165 166 def __str__(self): 167 err = self.statistics['model']['stdErrorEstimation'] 168 t = self.statistics['model summary']['t'] 169 sig = self.statistics['model summary']['sig'] 170 171 s = ' '.join(['%10s' % l for l in 172 ('Variable', 'Coeff Est', 'Std Error', 'tvalue', 'p')]) 173 174 fmt = '\n%10s ' + ' '.join(["%10.3f"] * 4) 175 if self.beta0 == True: 176 s += fmt % ('Constant', self.beta[0], err[0], t[0], sig[0]) 177 for i in range(len(self.domain.attributes)  1): 178 s += fmt % (self.domain.attributes[i].name, 179 self.beta[i + 1], err[i + 1], t[i + 1], sig[i + 1]) 180 else: 181 for i in range(len(self.domain.attributes)  1): 182 s += fmt % (self.domain.attributes[i].name, 183 self.beta[i], err[i], t[i], sig[i]) 184 return s 185 186 def get_sig(m1, m2, n): 187 if m1 == None or m2 == None: 359 return Orange.statistics.distribution.Continuous({1.0: yHat}) 360 return (yHat, Orange.statistics.distribution.Continuous({1.0: yHat})) 361 362 363 def print_linear_regression_model(lr): 364 """Prettyprints linear regression model, 365 i.e. estimated regression coefficients with standard errors, tscores 366 and significances. 367 368 :param lr: a linear regression model object. 369 :type lr: :class:`LinearRegression` 370 371 """ 372 from string import join 373 m = lr.model 374 labels = ('Variable', 'Coeff Est', 'Std Error', 'tvalue', 'p') 375 print join(['%10s' % l for l in labels], ' ') 376 377 fmt = "%10s " + join(["%10.3f"]*4, " ") + " %5s" 378 379 def get_star(p): 380 if p < 0.001: return "*"*3 381 elif p < 0.01: return "*"*2 382 elif p < 0.05: return "*" 383 elif p < 0.1: return "." 384 else: return " " 385 386 if m.intercept == True: 387 stars = get_star(m.pVals[0]) 388 print fmt % ('Intercept', m.coefficients[0], \ 389 m.stdError[0], m.tScores[0], m.pVals[0], stars) 390 for i in range(len(m.domain.attributes)): 391 stars = get_star(m.pVals[i+1]) 392 print fmt % (m.domain.attributes[i].name,\ 393 m.coefficients[i+1], m.stdError[i+1],\ 394 m.tScores[i+1], m.pVals[i+1], stars) 395 else: 396 for i in range(len(m.domain.attributes)): 397 stars = get_star(m.pVals[i]) 398 print fmt % (m.domain.attributes[i].name,\ 399 m.coefficients[i], m.stdError[i],\ 400 m.tScores[i], m.pVals[i], stars) 401 print "Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 empty 1" 402 403 404 405 def compare_models(c1, c2): 406 """ Compares if classifiaction model c1 is significantly better 407 than model c2. The comparison is based on Ftest, the pvalue 408 is returned. 409 410 :param c1, c2: linear regression model objects. 411 :type lr: :class:`LinearRegression` 412 413 """ 414 if c1 == None or c2 == None: 188 415 return 1.0 189 p1, p2 = len(m1.domain.attributes), len(m2.domain.attributes) 190 RSS1 = m1.statistics["model summary"]["ExplVar"] 191 RSS2 = m2.statistics["model summary"]["ExplVar"] 416 p1, p2, n = c1.model.m, c2.model.m, c1.model.n 417 RSS1, RSS2 = c1.model.sse, c2.model.sse 192 418 if RSS1 <= RSS2 or p2 <= p1 or n <= p2 or RSS2 <= 0: 193 419 return 1.0 194 F = ((RSS1  RSS2) / (p2  p1)) / (RSS2 / (n  p2)) 195 return statc.fprob(int(p2  p1), int(n  p2), F) 196 197 def stepwise(data, weight, add_sig=0.05, remove_sig=0.2): 198 inc_atts = [] 199 not_inc_atts = [at for at in data.domain.attributes] 200 201 changed_model = True 202 while changed_model: 203 changed_model = False 420 F = ((RSS1RSS2)/(p2p1))/(RSS2/(np2)) 421 return scipy.stats.fprob(int(p2p1), int(np2), F) 422 423 424 def stepwise(table, weight, addSig=0.05, removeSig=0.2): 425 """ Performs _`stepwise linear regression`: 426 http://en.wikipedia.org/wiki/Stepwise_regression 427 on table and returns the list of remaing independent variables 428 which fit a significant linear regression model.coefficients 429 430 :param table: data instances. 431 :type table: :class:`Orange.data.Table` 432 :param weight: the weights for instances. Default: None, i.e. all data 433 instances are eqaully important in fitting the regression parameters 434 :type weight: None or list of Orange.data.variable.Continuous 435 which stores the weights 436 :param addSig: lower bound of significance for which the variable 437 is included in regression model 438 default value = 0.05 439 :type addSig: float 440 :param removeSig: upper bound of significance for which the variable 441 is excluded from the regression model 442 default value = 0.2 443 :type removeSig: float 444 """ 445 446 447 incVars = [] 448 notIncVars = table.domain.attributes 449 450 changedModel = True 451 while changedModel: 452 changedModel = False 204 453 # remove all unsignificant conditions (learn several models, 205 # where each time one attribute is removed and check significance)206 orig_lin_reg = LinearRegressionLearner(data, use_attributes=inc_atts)207 reduced _lin_reg = []208 for ati in range(len(inc _atts)):454 # where each time one variable is removed and check significance) 455 c0 = LinearRegressionLearner(table, useVars=incVars) 456 reducedModel = [] # reduced model 457 for ati in range(len(incVars)): 209 458 try: 210 reduced _lin_reg.append(LinearRegressionLearner(data, weight,211 use _attributes=inc_atts[:ati] + inc_atts[(ati + 1):]))459 reducedModel.append(LinearRegressionLearner(table, weight, 460 useVars=incVars[:ati] + incVars[(ati + 1):])) 212 461 except: 213 reduced _lin_reg.append(None)462 reducedModel.append(None) 214 463 215 sigs = [ get_sig(r, orig_lin_reg, len(data)) for r in reduced_lin_reg]216 if sigs and max(sigs) > remove _sig:217 # remove that attribute, start again218 crit _att = inc_atts[sigs.index(max(sigs))]219 not _inc_atts.append(crit_att)220 inc _atts.remove(crit_att)221 changed _model = True464 sigs = [compare_models(r, c0) for r in reducedModel] 465 if sigs and max(sigs) > removeSig: 466 # remove that variable, start again 467 critVar = incVars[sigs.index(max(sigs))] 468 notIncVars.append(critVar) 469 incVars.remove(critVar) 470 changedModel = True 222 471 continue 223 472 224 473 # add all significant conditions (go through all attributes in 225 # not _inc_atts, is there one that significantly improves the model?226 more_complex_lin_reg= []227 for ati in range(len(not _inc_atts)):474 # notIncVars, is there one that significantly improves the model? 475 extendedModel = [] 476 for ati in range(len(notIncVars)): 228 477 try: 229 more_complex_lin_reg.append(LinearRegressionLearner(data,230 weight, use _attributes=inc_atts + [not_inc_atts[ati]]))478 extendedModel.append(LinearRegressionLearner(table, 479 weight, useVars=incVars + [notIncVars[ati]])) 231 480 except: 232 more_complex_lin_reg.append(None) 233 234 sigs = [get_sig(orig_lin_reg, r, len(data)) for r in more_complex_lin_reg] 235 if sigs and min(sigs) < add_sig: 236 best_att = not_inc_atts[sigs.index(min(sigs))] 237 inc_atts.append(best_att) 238 not_inc_atts.remove(best_att) 239 changed_model = True 240 return inc_atts 241 481 extendedModel.append(None) 482 483 sigs = [compare_models(c0, r) for r in extendedModel] 484 if sigs and min(sigs) < addSig: 485 bestVar = notIncVars[sigs.index(min(sigs))] 486 incVars.append(bestVar) 487 notIncVars.remove(bestVar) 488 changedModel = True 489 return incVars 490 491 492 if __name__ == "__main__": 493 494 import Orange 495 from Orange.regression import linear 496 497 table = Orange.data.Table("housing.tab") 498 c = LinearRegressionLearner(table) 499 print_linear_regression_model(c)
Note: See TracChangeset
for help on using the changeset viewer.