Changeset 9248:2bba59e63e7b in orange
 Timestamp:
 11/23/11 12:52:02 (2 years ago)
 Branch:
 default
 Convert:
 445b95bd1012dab1f4e056dd73d66f8553c13cb2
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/regression/pls.py
r9246 r9248 28 28 >>> # In such situation x and y do not need to be specified. 29 29 >>> l = pls.PLSRegressionLearner() 30 >>> c = l(table, x Vars=x, yVars=y)30 >>> c = l(table, x_vars=x, y_vars=y) 31 31 >>> c.print_pls_regression_coefficients() 32 32 Y1 Y2 Y3 Y4 … … 61 61 from Orange.regression.earth import data_label_mask 62 62 from numpy import dot, zeros 63 from numpy import linalg 63 64 from numpy.linalg import svd, pinv 65 66 from Orange.misc import deprecated_members, deprecated_keywords 67 64 68 65 69 def normalize_matrix(X): … … 73 77 74 78 """ 75 muX, sigmaX = numpy.mean(X, axis=0), numpy.std(X, axis=0) 76 sigmaX[sigmaX == 0] = 1. 77 return (XmuX)/sigmaX, muX, sigmaX 78 79 def nipals_xy(X, Y, mode="PLS", maxIter=500, tol=1e06): 79 mu_x, sigma_x = numpy.mean(X, axis=0), numpy.std(X, axis=0) 80 sigma_x[sigma_x == 0] = 1. 81 return (X  mu_x)/sigma_x, mu_x, sigma_x 82 83 @deprecated_keywords({"maxIter": "max_iter"}) 84 def nipals_xy(X, Y, mode="PLS", max_iter=500, tol=1e06): 80 85 """ 81 86 NIPALS algorithm. Returns the first left and rigth singular … … 88 93 :type mode: string 89 94 90 :param max Iter: maximal number of iterations (default 500)91 :type max Iter: int95 :param max_iter: maximal number of iterations (default 500) 96 :type max_iter: int 92 97 93 98 :param tol: tolerance parameter, if norm of difference … … 100 105 Xpinv = Ypinv = None 101 106 # Inner loop of the Wold algo. 102 while True and ite < max Iter:107 while True and ite < max_iter: 103 108 # Update u: the X weights 104 109 if mode == "CCA": … … 135 140 136 141 def svd_xy(X, Y): 137 """ Returns the first left and rig thsingular142 """ Returns the first left and right singular 138 143 vectors of X'Y. 139 144 140 145 :param X, Y: data matrix 141 146 :type X, Y: :class:`numpy.array` 147 142 148 """ 143 149 U, s, V = svd(dot(X.T, Y), full_matrices=False) … … 165 171 which is used for preprocessing the data (continuization and imputation) 166 172 before fitting the regression parameters 167 168 Basic notations: 169 n  number of data instances 170 p  number of independent variables 171 q  number of reponse variables 172 173 .. attribute:: T 174 175 A n x nComp numpy array of xscores 176 177 .. attribute:: U 178 179 A n x nComp numpy array of yscores 180 181 .. attribute:: W 182 183 A p x nComp numpy array of xweights 184 185 .. attribute:: C 186 187 A q x nComp numpy array of yweights 188 189 .. attribute:: P 190 191 A p x nComp numpy array of xloadings 192 193 .. attribute:: Q 194 195 A q x nComp numpy array of yloading 196 197 .. attribute:: coefs 198 199 A p x q numpy array coefficients 200 of the linear model: Y = X coefs + E 201 202 .. attribute:: xVars 203 204 list of independent variables 205 206 .. attribute:: yVars 207 208 list of response variables 209 210 211 """ 212 213 def __init__(self, nComp=2, deflationMode="regression", mode="PLS", 214 algorithm="nipals", maxIter=500, 173 174 """ 175 176 def __init__(self, n_comp=2, deflation_mode="regression", mode="PLS", 177 algorithm="nipals", max_iter=500, 215 178 imputer=None, continuizer=None, 216 179 **kwds): 217 180 """ 218 .. attribute:: n Comp181 .. attribute:: n_comp 219 182 220 183 number of components to keep. Default: 2 221 184 222 .. attribute:: deflation Mode185 .. attribute:: deflation_mode 223 186 224 187 "canonical" or "regression" (default) … … 236 199 237 200 """ 238 self.n Comp = nComp239 self.deflation Mode = deflationMode201 self.n_comp = n_comp 202 self.deflation_mode = deflation_mode 240 203 self.mode = mode 241 204 self.algorithm = algorithm 242 self.max Iter = maxIter205 self.max_iter = max_iter 243 206 self.set_imputer(imputer=imputer) 244 207 self.set_continuizer(continuizer=continuizer) 245 208 self.__dict__.update(kwds) 246 209 247 def __call__(self, table, weight_id=None, xVars=None, yVars=None): 210 @deprecated_keywords({"xVars": "x_vars", "yVars": "y_vars"}) 211 def __call__(self, table, weight_id=None, x_vars=None, y_vars=None): 248 212 """ 249 213 :param table: data instances. 250 214 :type table: :class:`Orange.data.Table` 251 215 252 :param x Vars, yVars: List of input and response variables216 :param x_vars, y_vars: List of input and response variables 253 217 (`Orange.data.variable.Continuous` or `Orange.data.variable.Discrete`). 254 218 If None (default) it is assumed that data definition provides information … … 256 220 has key "label" in dictionary Orange.data.Domain[var].attributes 257 221 it is treated as a response variable 258 :type x Vars, yVars: list222 :type x_vars, y_vars: list 259 223 260 224 """ 261 225 domain = table.domain 262 if x Vars is None and yVars is None:226 if x_vars is None and y_vars is None: 263 227 # Response variables are defined in the table. 264 228 label_mask = data_label_mask(domain) 265 229 multilabel_flag = (sum(label_mask)  (1 if domain.class_var else 0)) > 0 266 x Vars = [v for v, label in zip(domain, label_mask) if not label]267 y Vars = [v for v, label in zip(domain, label_mask) if label]268 x_table = select_attrs(table, x Vars)269 y_table = select_attrs(table, y Vars)230 x_vars = [v for v, label in zip(domain, label_mask) if not label] 231 y_vars = [v for v, label in zip(domain, label_mask) if label] 232 x_table = select_attrs(table, x_vars) 233 y_table = select_attrs(table, y_vars) 270 234 271 elif x Vars and yVars:235 elif x_vars and y_vars: 272 236 # independent and response variables are passed by the caller 273 if domain.class_var and domain.class_var not in y Vars:237 if domain.class_var and domain.class_var not in y_vars: 274 238 # if the original table contains class variable 275 # add it to the y Vars276 y Vars.append(domain.class_var)277 label_mask = [v in y Vars for v in domain.variables]239 # add it to the y_vars 240 y_vars.append(domain.class_var) 241 label_mask = [v in y_vars for v in domain.variables] 278 242 multilabel_flag = True 279 x_table = select_attrs(table, x Vars)280 y_table = select_attrs(table, y Vars)243 x_table = select_attrs(table, x_vars) 244 y_table = select_attrs(table, y_vars) 281 245 else: 282 raise ValueError("Both xVars and yVars must be defined.") 283 284 # if independent and response variables are not listed in domain 285 # if xVars is not None: 286 # for var in xVars: 287 # if table.domain[var].attributes.has_key("label"): 288 # del table.domain[var].attributes["label"] 289 # if yVars is not None: 290 # for var in yVars: 291 # table.domain[var].attributes["label"] = True 292 293 # if the original table contains class variable 294 # if table.domain.class_var is not None: 295 # oldClass = table.domain.class_var 296 # newDomain = Orange.data.Domain(table.domain.variables, 0) 297 # newDomain[oldClass].attributes["label"] = True 298 # table = Orange.data.Table(newDomain, table) 246 raise ValueError("Both x_vars and y_vars must be defined.") 299 247 300 248 # dicrete values are continuized … … 305 253 y_table = self.impute_table(y_table) 306 254 307 # Collect the new transformed xVars/yVars 308 xVars = list(x_table.domain.variables) 309 yVars = list(y_table.domain.variables) 310 311 self.domain = Orange.data.Domain(xVars + yVars, False) 312 label_mask = [False for _ in xVars] + [True for _ in yVars] 313 314 # label_mask = data_label_mask(table.domain) 315 # xy = table.toNumpy()[0] 316 # y, x = xy[:, label_mask], xy[:, ~ label_mask] 317 # self.yVars = [v for v, m in zip(self.domain.variables, label_mask) if m] 318 # self.xVars = [v for v in self.domain.variables if v not in self.yVars] 255 # Collect the new transformed x_vars/y_vars 256 x_vars = list(x_table.domain.variables) 257 y_vars = list(y_table.domain.variables) 258 259 domain = Orange.data.Domain(x_vars + y_vars, False) 260 label_mask = [False for _ in x_vars] + [True for _ in y_vars] 261 319 262 x = x_table.toNumpy()[0] 320 263 y = y_table.toNumpy()[0] 321 264 322 self.fit(x, y) 323 return PLSRegression(label_mask=label_mask, domain=self.domain, \ 324 coefs=self.coefs, muX=self.muX, muY=self.muY, \ 325 sigmaX=self.sigmaX, sigmaY=self.sigmaY, \ 326 xVars=xVars, yVars=yVars, multilabel_flag=multilabel_flag) 265 kwargs = self.fit(x, y) 266 return PLSRegression(label_mask=label_mask, domain=domain, \ 267 # coefs=self.coefs, muX=self.muX, muY=self.muY, \ 268 # sigmaX=self.sigmaX, sigmaY=self.sigmaY, \ 269 x_vars=x_vars, y_vars=y_vars, 270 multilabel_flag=multilabel_flag, **kwargs) 327 271 328 272 def fit(self, X, Y): 329 273 """ Fits all unknown parameters, i.e. 330 274 weights, scores, loadings (for x and y) and regression coefficients. 331 275 Returns a dict with all of the parameters. 276 332 277 """ 333 # copy since this will contain sthe residuals (deflated) matrices278 # copy since this will contain the residuals (deflated) matrices 334 279 335 280 X, Y = X.copy(), Y.copy() … … 340 285 341 286 # normalization of data matrices 342 X, self.muX, self.sigmaX = normalize_matrix(X)343 Y, self.muY, self.sigmaY = normalize_matrix(Y)287 X, muX, sigmaX = normalize_matrix(X) 288 Y, muY, sigmaY = normalize_matrix(Y) 344 289 # Residuals (deflated) matrices 345 290 Xk, Yk = X, Y 346 291 # Results matrices 347 self.T, self.U = zeros((n, self.nComp)), zeros((n, self.nComp))348 self.W, self.C = zeros((p, self.nComp)), zeros((q, self.nComp))349 self.P, self.Q = zeros((p, self.nComp)), zeros((q, self.nComp))292 T, U = zeros((n, self.n_comp)), zeros((n, self.n_comp)) 293 W, C = zeros((p, self.n_comp)), zeros((q, self.n_comp)) 294 P, Q = zeros((p, self.n_comp)), zeros((q, self.n_comp)) 350 295 351 296 # NIPALS over components 352 for k in xrange(self.n Comp):297 for k in xrange(self.n_comp): 353 298 # Weights estimation (inner loop) 354 299 if self.algorithm == "nipals": 355 u, v = nipals_xy(X=Xk, Y=Yk, mode=self.mode) 300 u, v = nipals_xy(X=Xk, Y=Yk, mode=self.mode, 301 max_iter=self.max_iter) 356 302 elif self.algorithm == "svd": 357 303 u, v = svd_xy(X=Xk, Y=Yk) … … 363 309 #  substract rankone approximations to obtain remainder matrix 364 310 Xk = dot(xScore, xLoadings.T) 365 if self.deflation Mode == "canonical":311 if self.deflation_mode == "canonical": 366 312 #  regress Yk's on yScore, then substract rankone approx. 367 313 yLoadings = dot(Yk.T, yScore) / dot(yScore.T, yScore) 368 314 Yk = dot(yScore, yLoadings.T) 369 if self.deflation Mode == "regression":315 if self.deflation_mode == "regression": 370 316 #  regress Yk's on xScore, then substract rankone approx. 371 317 yLoadings = dot(Yk.T, xScore) / dot(xScore.T, xScore) 372 318 Yk = dot(xScore, yLoadings.T) 373 319 # Store weights, scores and loadings 374 self.T[:, k] = xScore.ravel() # xscores375 self.U[:, k] = yScore.ravel() # yscores376 self.W[:, k] = u.ravel() # xweights377 self.C[:, k] = v.ravel() # yweights378 self.P[:, k] = xLoadings.ravel() # xloadings379 self.Q[:, k] = yLoadings.ravel() # yloadings320 T[:, k] = xScore.ravel() # xscores 321 U[:, k] = yScore.ravel() # yscores 322 W[:, k] = u.ravel() # xweights 323 C[:, k] = v.ravel() # yweights 324 P[:, k] = xLoadings.ravel() # xloadings 325 Q[:, k] = yLoadings.ravel() # yloadings 380 326 # X = TP' + E and Y = UQ' + E 381 327 … … 383 329 # T = X W(P'W)^1 = XW* (W* : p x k matrix) 384 330 # U = Y C(Q'C)^1 = YC* (W* : q x k matrix) 385 self.xRotations = dot(self.W, 386 pinv(dot(self.P.T, self.W))) 331 xRotations = dot(W, pinv(dot(P.T, W))) 387 332 if Y.shape[1] > 1: 388 self.yRotations = dot(self.C, 389 pinv(dot(self.Q.T, self.C))) 333 yRotations = dot(C, pinv(dot(Q.T, C))) 390 334 else: 391 self.yRotations = numpy.ones(1)392 393 if self.deflationMode == "regression":335 yRotations = numpy.ones(1) 336 337 if True or self.deflation_mode == "regression": 394 338 # Estimate regression coefficient 395 339 # Y = TQ' + E = X W(P'W)^1Q' + E = XB + E 396 340 # => B = W*Q' (p x q) 397 self.coefs = dot(self.xRotations, self.Q.T) 398 self.coefs = 1. / self.sigmaX.reshape((p, 1)) * \ 399 self.coefs * self.sigmaY 400 return self 401 402 """ 403 def transform(self, X, Y=None): 404 405 # Normalize 406 Xc = (X  self.muX) / self.sigmaX 407 if Y is not None: 408 Yc = (Y  self.muY) / self.sigmaY 409 # Apply rotation 410 xScores = dot(Xc, self.xRotations) 411 if Y is not None: 412 yScores = dot(Yc, self.yRotations) 413 return xScores, yScores 414 415 return xScores 416 """ 417 341 coefs = dot(xRotations, Q.T) 342 coefs = 1. / sigmaX.reshape((p, 1)) * \ 343 coefs * sigmaY 344 345 return {"mu_x": muX, "mu_y": muY, "sigma_x": sigmaX, 346 "sigma_y": sigmaY, "T": T, "U":U, "W":U, 347 "C": C, "P":P, "Q":Q, "x_rotations": xRotations, 348 "y_rotations": yRotations, "coefs": coefs} 349 350 deprecated_members({"nComp": "n_comp", 351 "deflationMode": "deflation_mode", 352 "maxIter": "max_iter"}, 353 wrap_methods=["__init__"], 354 in_place=True)(PLSRegressionLearner) 418 355 419 356 class PLSRegression(Orange.classification.Classifier): … … 421 358 based on the values of independent variables. 422 359 360 Basic notations: 361 n  number of data instances 362 p  number of independent variables 363 q  number of reponse variables 364 365 .. attribute:: T 366 367 A n x n_comp numpy array of xscores 368 369 .. attribute:: U 370 371 A n x n_comp numpy array of yscores 372 373 .. attribute:: W 374 375 A p x n_comp numpy array of xweights 376 377 .. attribute:: C 378 379 A q x n_comp numpy array of yweights 380 381 .. attribute:: P 382 383 A p x n_comp numpy array of xloadings 384 385 .. attribute:: Q 386 387 A q x n_comp numpy array of yloading 388 389 .. attribute:: coefs 390 391 A p x q numpy array coefficients 392 of the linear model: Y = X coefs + E 393 394 .. attribute:: x_vars 395 396 list of independent variables 397 398 .. attribute:: y_vars 399 400 list of response variables 401 423 402 """ 424 403 def __init__(self, label_mask=None, domain=None, \ 425 coefs=None, mu X=None, muY=None, sigmaX=None, sigmaY=None, \426 x Vars=None, yVars=None, multilabel_flag=0):404 coefs=None, mu_x=None, mu_y=None, sigma_x=None, sigma_y=None, \ 405 x_vars=None, y_vars=None, multilabel_flag=0, **kwargs): 427 406 self.label_mask = label_mask 428 407 self.domain = domain 429 408 self.coefs = coefs 430 self.mu X, self.muY = muX, muY431 self.sigma X, self.sigmaY = sigmaX, sigmaY432 self.x Vars, self.yVars = xVars, yVars409 self.mu_x, self.mu_y = mu_x, mu_y 410 self.sigma_x, self.sigma_y = sigma_x, sigma_y 411 self.x_vars, self.y_vars = x_vars, y_vars 433 412 self.multilabel_flag = multilabel_flag 434 if not multilabel_flag: 435 self.class_var = yVars[0] 413 if not multilabel_flag and y_vars: 414 self.class_var = y_vars[0] 415 416 for name, val in kwargs.items(): 417 setattr(self, name, val) 436 418 437 419 def __call__(self, instance, result_type=Orange.core.GetValue): … … 443 425 """ 444 426 instance = Orange.data.Instance(self.domain, instance) 445 ins = [instance[v].native() for v in self.x Vars]427 ins = [instance[v].native() for v in self.x_vars] 446 428 447 429 if "?" in ins: # missing value > corresponding coefficient omitted … … 449 431 ins = map(miss_2_0, ins) 450 432 ins = numpy.array(ins) 451 xc = (ins  self.mu X) / self.sigmaX452 predicted = dot(xc, self.coefs) * self.sigma Y + self.muY453 y_hat = [var(val) for var, val in zip(self.y Vars, predicted)]433 xc = (ins  self.mu_x) / self.sigma_x 434 predicted = dot(xc, self.coefs) * self.sigma_y + self.mu_y 435 y_hat = [var(val) for var, val in zip(self.y_vars, predicted)] 454 436 if result_type == Orange.core.GetValue: 455 437 return y_hat if self.multilabel_flag else y_hat[0] … … 457 439 from Orange.statistics.distribution import Distribution 458 440 probs = [] 459 for var, val in zip(self.y Vars, y_hat):441 for var, val in zip(self.y_vars, y_hat): 460 442 dist = Distribution(var) 461 443 dist[val] = 1.0 … … 469 451 """ Prettyprints the coefficient of the PLS regression model. 470 452 """ 471 x Vars, yVars = [x.name for x in self.xVars], [y.name for y in self.yVars]472 print " " * 7 + "%6s " * len(y Vars) % tuple(yVars)473 fmt = "%6s " + "%5.3f " * len(y Vars)453 x_vars, y_vars = [x.name for x in self.x_vars], [y.name for y in self.y_vars] 454 print " " * 7 + "%6s " * len(y_vars) % tuple(y_vars) 455 fmt = "%6s " + "%5.3f " * len(y_vars) 474 456 for i, coef in enumerate(self.coefs): 475 print fmt % tuple([xVars[i]] + list(coef)) 476 477 457 print fmt % tuple([x_vars[i]] + list(coef)) 458 459 """ 460 def transform(self, X, Y=None): 461 462 # Normalize 463 Xc = (X  self.muX) / self.sigmaX 464 if Y is not None: 465 Yc = (Y  self.muY) / self.sigmaY 466 # Apply rotation 467 xScores = dot(Xc, self.xRotations) 468 if Y is not None: 469 yScores = dot(Yc, self.yRotations) 470 return xScores, yScores 471 472 return xScores 473 """ 474 475 deprecated_members({"xVars": "x_vars", 476 "yVars": "y_vars", 477 "muX": "mu_x", 478 "muY": "mu_y", 479 "sigmaX": "sigma_x", 480 "sigmaY": "sigma_y"}, 481 wrap_methods=["__init__"], 482 in_place=True)(PLSRegression) 483 478 484 if __name__ == "__main__": 479 485 … … 487 493 y = [var for var in table.domain if var.name[0]=="Y"] 488 494 print x, y 489 # c = l(table, x Vars=x, yVars=y)495 # c = l(table, x_vars=x, y_vars=y) 490 496 c = l(table) 491 497 c.print_pls_regression_coefficients()
Note: See TracChangeset
for help on using the changeset viewer.