Changeset 10307:d6d8086a228f in orange
 Timestamp:
 02/18/12 18:35:59 (2 years ago)
 Branch:
 default
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/regression/lasso.py
r10294 r10307 138 138 bootTable = Orange.data.Table(table.domain) 139 139 for i in range(n): 140 id = numpy.random.randint(0, n)140 id = numpy.random.randint(0, n) 141 141 bootTable.append(table[id]) 142 142 return bootTable … … 205 205 self.set_imputer(imputer=imputer) 206 206 self.set_continuizer(continuizer=continuizer) 207 208 207 208 209 209 def __call__(self, table, weight=None): 210 210 """ … … 217 217 which stores weights for instances 218 218 219 """ 219 """ 220 220 # dicrete values are continuized 221 221 table = self.continuize_table(table) … … 226 226 X, y, w = table.to_numpy() 227 227 n, m = numpy.shape(X) 228 228 229 229 X, mu_x, sigma_x = standardize(X) 230 230 y, coef0 = center(y) 231 231 232 232 t = self.t 233 233 234 234 if self.s is not None: 235 235 beta_full, rss, _, _ = numpy.linalg.lstsq(X, y) 236 236 t = self.s * numpy.sum(numpy.abs(beta_full)) 237 237 print "t =", t 238 238 239 239 import scipy.optimize 240 240 241 241 # objective function to be minimized 242 242 objective = lambda beta: numpy.linalg.norm(y  numpy.dot(X, beta)) … … 246 246 cnstr = lambda beta: t  numpy.sum(numpy.abs(beta)) 247 247 # optimal solution 248 coefficients = scipy.optimize.fmin_cobyla(objective, beta_init,\ 249 cnstr, disp=0) 248 coefficients = scipy.optimize.fmin_cobyla(objective, beta_init, cnstr) 250 249 251 250 # set small coefficients to 0 … … 253 252 coefficients = numpy.array(map(set_2_0, coefficients)) 254 253 coefficients /= sigma_x 255 254 256 255 # bootstrap estimator of standard error of the coefficient estimators 257 256 # assumption: fixed t … … 277 276 coeff_p.append(c.coefficients) 278 277 p_vals = \ 279 numpy.sum(abs(numpy.array(coeff_p)) >\278 numpy.sum(abs(numpy.array(coeff_p)) > \ 280 279 abs(numpy.array(coefficients)), \ 281 axis=0) /float(self.n_perm)280 axis=0) / float(self.n_perm) 282 281 else: 283 282 p_vals = [float("nan")] * m … … 287 286 dict_model = {} 288 287 for i, var in enumerate(domain.attributes): 289 dict_model[var.name] = (coefficients[i], std_errors_fixed_t[i], p_vals[i]) 290 288 dict_model[var.name] = (coefficients[i], std_errors_fixed_t[i], p_vals[i]) 289 291 290 return LassoRegression(domain=domain, class_var=domain.class_var, 292 291 coef0=coef0, coefficients=coefficients, 293 292 std_errors_fixed_t=std_errors_fixed_t, 294 293 p_vals=p_vals, 295 dict_model= 294 dict_model=dict_model, 296 295 mu_x=mu_x) 297 296 298 297 deprecated_members({"nBoot": "n_boot", 299 "nPerm": "n_perm"}, 298 "nPerm": "n_perm"}, 300 299 wrap_methods=["__init__"], 301 300 in_place=True)(LassoRegressionLearner) … … 334 333 Sample mean of the all independent variables. 335 334 336 """ 335 """ 337 336 def __init__(self, domain=None, class_var=None, coef0=None, 338 337 coefficients=None, std_errors_fixed_t=None, p_vals=None, … … 353 352 variable will be predicted 354 353 :type instance: 355 """ 354 """ 356 355 ins = Orange.data.Instance(self.domain, instance) 357 356 if "?" in ins: # missing value > corresponding coefficient omitted … … 362 361 ins = numpy.array(ins.native())[:1]  self.mu_x 363 362 364 y_hat = numpy.dot(self.coefficients, ins) + self.coef0 363 y_hat = numpy.dot(self.coefficients, ins) + self.coef0 365 364 y_hat = self.class_var(y_hat) 366 365 dist = Orange.statistics.distribution.Continuous(self.class_var) … … 372 371 else: 373 372 return (y_hat, dist) 374 373 375 374 @deprecated_keywords({"skipZero": "skip_zero"}) 376 375 def to_string(self, skip_zero=True): … … 384 383 :type skip_zero: boolean 385 384 """ 386 385 387 386 from string import join 388 387 labels = ('Variable', 'Coeff Est', 'Std Error', 'p') 389 388 lines = [join(['%10s' % l for l in labels], ' ')] 390 389 391 fmt = "%10s " + join(["%10.3f"] *3, " ") + " %5s"390 fmt = "%10s " + join(["%10.3f"] * 3, " ") + " %5s" 392 391 fmt1 = "%10s %10.3f" 393 392 … … 399 398 else: return " " 400 399 401 stars = 400 stars = get_star(self.p_vals[0]) 402 401 lines.append(fmt1 % ('Intercept', self.coef0)) 403 402 skipped = [] … … 405 404 if self.coefficients[i] == 0. and skip_zero: 406 405 skipped.append(self.domain.attributes[i].name) 407 continue 406 continue 408 407 stars = get_star(self.p_vals[i]) 409 lines.append(fmt % (self.domain.attributes[i].name, 410 self.coefficients[i], self.std_errors_fixed_t[i], 408 lines.append(fmt % (self.domain.attributes[i].name, 409 self.coefficients[i], self.std_errors_fixed_t[i], 411 410 self.p_vals[i], stars)) 412 411 lines.append("Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 empty 1") … … 437 436 438 437 import Orange 439 440 table = Orange.data.Table("housing.tab") 438 439 table = Orange.data.Table("housing.tab") 441 440 442 441 c = LassoRegressionLearner(table, t=len(table.domain))
Note: See TracChangeset
for help on using the changeset viewer.