Changeset 10643:79cc41d29eb7 in orange
 Timestamp:
 03/26/12 15:48:33 (2 years ago)
 Branch:
 default
 rebase_source:
 88f2d0bbd5c094dd32667be3498d631f76c5c883
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/regression/linear.py
r10616 r10643 148 148 before fitting the regression parameters. 149 149 150 """ 150 """ 151 151 152 152 def __init__(self, name='linear regression', intercept=True, … … 197 197 self.use_vars = use_vars 198 198 self.__dict__.update(kwds) 199 199 200 200 def __call__(self, table, weight=None, verbose=0): 201 201 """ … … 207 207 :type weight: None or list of Orange.feature.Continuous 208 208 which stores weights for instances 209 """ 209 """ 210 210 if self.use_vars is not None: 211 211 new_domain = Orange.data.Domain(self.use_vars, … … 214 214 table = Orange.data.Table(new_domain, table) 215 215 216 # discrete values are continuized 216 # discrete values are continuized 217 217 table = self.continuize_table(table) 218 218 219 219 # missing values are imputed 220 220 table = self.impute_table(table) … … 232 232 X, y, w = table.to_numpy() 233 233 n, m = numpy.shape(X) 234 234 235 235 if self.intercept: 236 236 X = numpy.insert(X, 0, 1, axis=1) # adds a column of ones 237 237 238 238 if weight: 239 239 weights = numpy.sqrt([float(ins[weight]) for ins in table]) 240 240 X = weights.reshape(n, 1) * X 241 241 y = weights * y 242 242 243 243 cov = dot(X.T, X) 244 244 245 245 if self.ridge_lambda: 246 246 stride = cov.shape[0] + 1 … … 265 265 coefficients=coefficients, std_coefficients=std_coefficients, 266 266 intercept=self.intercept) 267 268 267 269 268 fitted = dot(X, coefficients) … … 271 270 for i, ins in enumerate(table)] 272 271 273 # model summary 272 # model summary 273 df_reg = n  m  self.intercept 274 274 # total sum of squares (total variance) 275 275 sst = numpy.sum((y  mu_y) ** 2) 276 # sum of squares due to regression(explained variance)276 # regression sum of squares (explained variance) 277 277 ssr = numpy.sum((fitted  mu_y) ** 2) 278 # error sum of squares (unexplaied variance)279 sse = sst  ssr278 # residual sum of squares 279 sse = numpy.sum((y  fitted) ** 2) 280 280 # coefficient of determination 281 281 r2 = ssr / sst 282 r2 adj = 1  (1  r2) * (n  1) / (n  m  1)283 F = (ssr / m) / (sst  ssr / (n  m  1)) if m else None284 df = n  2285 sigma_square = sse / (n  m  1)282 r2 = 1  sse / sst 283 r2adj = 1  (1  r2) * (n  1) / df_reg 284 F = (ssr / m) / ((sst  ssr) / df_reg) if m else 0 285 sigma_square = sse / df_reg 286 286 # standard error of the regression estimator, tscores and pvalues 287 287 std_error = sqrt(sigma_square * invcov.diagonal()) 288 288 t_scores = coefficients / std_error 289 p_vals = [stats.betai(df * 0.5, 0.5, df / (df + t * t)) 289 df_res = n  2 290 p_vals = [stats.betai(df_res * 0.5, 0.5, df_res / (df_res + t * t)) 290 291 for t in t_scores] 291 292 … … 300 301 dict_model[var.name] = (coefficients[j], std_error[j], 301 302 t_scores[j], p_vals[j]) 302 303 303 304 return LinearRegression(domain.class_var, domain, coefficients, F, 304 305 std_error=std_error, t_scores=t_scores, p_vals=p_vals, … … 323 324 324 325 .. attribute:: F 325 326 326 327 Fstatistics of the model. 327 328 … … 333 334 .. attribute:: std_error 334 335 335 Standard errors of the coefficient estimator, stored in list. 336 Standard errors of the coefficient estimator, stored in list. 336 337 337 338 .. attribute:: t_scores 338 339 339 List of tscores for the estimated regression coefficients. 340 List of tscores for the estimated regression coefficients. 340 341 341 342 .. attribute:: p_vals … … 343 344 List of pvalues for the null hypothesis that the regression 344 345 coefficients equal 0 based on tscores and two sided 345 alternative hypothesis. 346 alternative hypothesis. 346 347 347 348 .. attribute:: dict_model … … 364 365 .. attribute:: m 365 366 366 Number of independent (predictor) variables. 367 Number of independent (predictor) variables. 367 368 368 369 .. attribute:: n 369 370 370 Number of instances. 371 Number of instances. 371 372 372 373 .. attribute:: mu_y 373 374 374 Sample mean of the dependent variable. 375 Sample mean of the dependent variable. 375 376 376 377 .. attribute:: r2 … … 393 394 Standardized regression coefficients. 394 395 395 """ 396 397 398 396 """ 397 399 398 def __init__(self, class_var=None, domain=None, coefficients=None, F=None, 400 399 std_error=None, t_scores=None, p_vals=None, dict_model=None, … … 433 432 variable will be predicted 434 433 :type instance: :obj:`~Orange.data.Instance` 435 """ 434 """ 436 435 ins = Orange.data.Instance(self.domain, instance) 437 436 ins = numpy.array(ins.native()) … … 462 461 return (y_hat, dist) 463 462 464 465 463 def to_string(self): 466 464 """Prettyprints linear regression model, … … 469 467 470 468 """ 471 from string import join 469 from string import join 472 470 labels = ('Variable', 'Coeff Est', 'Std Error', 'tvalue', 'p') 473 471 lines = [join(['%10s' % l for l in labels], ' ')] … … 482 480 elif p < 0.1: return "." 483 481 else: return " " 484 482 485 483 if self.intercept == True: 486 484 stars = get_star(self.p_vals[0]) … … 512 510 513 511 :param c1, c2: linear regression model objects. 514 :type lr: :class:`LinearRegression` 512 :type lr: :class:`LinearRegression` 515 513 516 514 """ … … 548 546 """ 549 547 550 551 548 inc_vars = [] 552 549 not_inc_vars = table.domain.attributes … … 565 562 except Exception: 566 563 reduced_model.append(None) 567 564 568 565 sigs = [compare_models(r, c0) for r in reduced_model] 569 566 if sigs and max(sigs) > remove_sig: … … 584 581 except Exception: 585 582 extended_model.append(None) 586 583 587 584 sigs = [compare_models(c0, r) for r in extended_model] 588 585 if sigs and min(sigs) < add_sig:
Note: See TracChangeset
for help on using the changeset viewer.