Changeset 8940:39a8cd094c11 in orange
 Timestamp:
 09/09/11 17:59:22 (3 years ago)
 Branch:
 default
 Convert:
 bf9bd75c4a045457f1669baefaaba3110a2f0cc8
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/regression/pls.py
r8924 r8940 146 146 147 147 148 def select_attrs(table, attributes, class_var=None, metas=None): 149 """ Select only ``attributes`` from the ``table``. 150 """ 151 domain = Orange.data.Domain(attributes, class_var) 152 if metas: 153 domain.add_metas(metas) 154 return Orange.data.Table(domain, table) 155 156 148 157 class PLSRegressionLearner(base.BaseRegressionLearner): 149 158 """ Fits the partial least squares regression model, … … 241 250 242 251 :param xVars, yVars: List of input and response variables 243 (`Orange.data.variable.Continuous` or `Orange.data.variable. Continuous`).252 (`Orange.data.variable.Continuous` or `Orange.data.variable.Discrete`). 244 253 If None (default) it is assumed that data definition provides information 245 254 which variables are reponses and which not. If a variable var … … 249 258 250 259 """ 251 260 domain = table.domain 261 if xVars is None and yVars is None: 262 # Response variables are defined in the table. 263 label_mask = data_label_mask(domain) 264 xVars = [v for v, label in zip(domain, label_mask) if not label] 265 yVars = [v for v, label in zip(domain, label_mask) if label] 266 x_table = select_attrs(table, xVars) 267 y_table = select_attrs(table, yVars) 268 269 elif xVars and yVars: 270 # independent and response variables are passed by the caller 271 if domain.class_var and domain.class_var not in yVars: 272 # if the original table contains class variable 273 # add it to the yVars 274 yVars.append(domain.class_var) 275 label_mask = [v in yVars for v in domain.variables] 276 277 x_table = select_attrs(table, xVars) 278 y_table = select_attrs(table, yVars) 279 else: 280 raise ValueError("Both xVars and yVars must be defined.") 281 252 282 # if independent and response variables are not listed in domain 253 if xVars is not None:254 for var in xVars:255 if table.domain[var].attributes.has_key("label"):256 del table.domain[var].attributes["label"]257 if yVars is not None:258 for var in yVars:259 table.domain[var].attributes["label"] = True283 # if xVars is not None: 284 # for var in xVars: 285 # if table.domain[var].attributes.has_key("label"): 286 # del table.domain[var].attributes["label"] 287 # if yVars is not None: 288 # for var in yVars: 289 # table.domain[var].attributes["label"] = True 260 290 261 291 # if the original table contains class variable 262 if table.domain.class_var is not None:263 oldClass = table.domain.class_var264 newDomain = Orange.data.Domain(table.domain.variables, 0)265 newDomain[oldClass].attributes["label"] = True266 table = Orange.data.Table(newDomain, table)292 # if table.domain.class_var is not None: 293 # oldClass = table.domain.class_var 294 # newDomain = Orange.data.Domain(table.domain.variables, 0) 295 # newDomain[oldClass].attributes["label"] = True 296 # table = Orange.data.Table(newDomain, table) 267 297 268 298 # dicrete values are continuized 269 table = self.continuize_table(table) 299 x_table = self.continuize_table(x_table) 300 y_table = self.continuize_table(y_table) 270 301 # missing values are imputed 271 table = self.impute_table(table) 272 273 self.domain = table.domain 274 label_mask = data_label_mask(table.domain) 275 xy = table.toNumpy()[0] 276 y, x = xy[:, label_mask], xy[:, ~ label_mask] 277 self.yVars = [v for v, m in zip(self.domain.variables, label_mask) if m] 278 self.xVars = [v for v in self.domain.variables if v not in self.yVars] 302 x_table = self.impute_table(x_table) 303 y_table = self.impute_table(y_table) 304 305 # Collect the new transformed xVars/yVars 306 xVars = list(x_table.domain.variables) 307 yVars = list(y_table.domain.variables) 308 309 self.domain = Orange.data.Domain(xVars + yVars, False) 310 label_mask = [False for _ in xVars] + [True for _ in yVars] 311 312 # label_mask = data_label_mask(table.domain) 313 # xy = table.toNumpy()[0] 314 # y, x = xy[:, label_mask], xy[:, ~ label_mask] 315 # self.yVars = [v for v, m in zip(self.domain.variables, label_mask) if m] 316 # self.xVars = [v for v in self.domain.variables if v not in self.yVars] 317 x = x_table.toNumpy()[0] 318 y = y_table.toNumpy()[0] 279 319 280 320 self.fit(x, y) … … 282 322 coefs=self.coefs, muX=self.muX, muY=self.muY, \ 283 323 sigmaX=self.sigmaX, sigmaY=self.sigmaY, \ 284 xVars= self.xVars, yVars=self.yVars)324 xVars=xVars, yVars=yVars) 285 325 286 326 def fit(self, X, Y): … … 378 418 """ PLSRegression predicts value of the response variables 379 419 based on the values of independent variables. 380 """381 420 421 """ 382 422 def __init__(self, label_mask=None, domain=None, \ 383 423 coefs=None, muX=None, muY=None, sigmaX=None, sigmaY=None, \ … … 394 434 :param instance: data instance for which the value of the response 395 435 variable will be predicted 396 :type instance: 436 :type instance: :class:`Orange.data.Instance` 437 397 438 """ 398 439 instance = Orange.data.Instance(self.domain, instance) 399 ins = Orange.data.Instance(self.domain, instance)400 440 ins = [instance[v].native() for v in self.xVars] 441 401 442 if "?" in ins: # missing value > corresponding coefficient omitted 402 443 def miss_2_0(x): return x if x != "?" else 0 … … 441 482 y = [var for var in table.domain if var.name[0]=="Y"] 442 483 print x, y 443 c = l(table, xVars=x, yVars=y) 484 # c = l(table, xVars=x, yVars=y) 485 c = l(table) 444 486 c.print_pls_regression_coefficients()
Note: See TracChangeset
for help on using the changeset viewer.