Changeset 10645:a9cffa7f948c in orange for Orange/projection/linear.py
 Timestamp:
 03/26/12 16:37:25 (2 years ago)
 Branch:
 default
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/projection/linear.py
r10644 r10645 163 163 164 164 return Projector(input_domain=domain, 165 mean=Xm,166 s tdev=stdev,165 center=Xm, 166 scale=stdev, 167 167 standardize=False, 168 168 projection=U) … … 933 933 if method == DR_PCA: 934 934 pca = Pca(standardize=False, max_components=ncomps, 935 use_generalized_eigenvectors=False )935 use_generalized_eigenvectors=False, ddof=0) 936 936 domain = data.Domain([feature.Continuous("g%d" % i) for i 937 937 in xrange(len(data_matrix))], False) … … 940 940 elif method == DR_SPCA and self.graph.data_has_class: 941 941 pca = Spca(standardize=False, max_components=ncomps, 942 use_generalized_eigenvectors=self.use_generalized_eigenvectors )942 use_generalized_eigenvectors=self.use_generalized_eigenvectors, ddof=0) 943 943 domain = data.Domain([feature.Continuous("g%d" % i) for i 944 944 in xrange(len(data_matrix))], feature.Continuous("c")) … … 1209 1209 #: Array containing means of each variable in the data set that was used 1210 1210 #: to construct the projection. 1211 mean= numpy.array(())1211 center = numpy.array(()) 1212 1212 1213 1213 #: An array containing standard deviations of each variable in the data 1214 1214 #: set that was used to construct the projection. 1215 s tdev= numpy.array(())1215 scale = numpy.array(()) 1216 1216 1217 1217 #: True, if standardization was used when constructing the projection. If … … 1238 1238 def _project_single(self, example, return_what, new_feature, feature_idx): 1239 1239 ex = Orange.data.Table([example]).to_numpy("a")[0] 1240 ex = self. mean1240 ex = self.center 1241 1241 if self.standardize: 1242 ex /= self.s tdev1242 ex /= self.scale 1243 1243 return new_feature(numpy.dot(self.projection[feature_idx, :], ex.T)[0]) 1244 1244 … … 1258 1258 1259 1259 X, = dataset.to_numpy_MA("a") 1260 Xm, U = self. mean, self.projection1260 Xm, U = self.center, self.projection 1261 1261 n, m = X.shape 1262 1262 … … 1267 1267 1268 1268 if self.standardize: 1269 Xd /= self.s tdev1269 Xd /= self.scale 1270 1270 1271 1271 self.A = numpy.dot(Xd, U.T) … … 1295 1295 """ 1296 1296 1297 #: Delta degrees of freedom used for numpy operations. 1298 #: 1 means normalization with (N1) in cov and std operations 1299 ddof = 1 1300 1297 1301 def __new__(cls, dataset=None, **kwds): 1298 1302 optimizer = object.__new__(cls) … … 1340 1344 1341 1345 return PcaProjector(input_domain=dataset.domain, 1342 mean=mean,1343 s tdev=stdev,1346 center=mean, 1347 scale=stdev, 1344 1348 standardize=self.standardize, 1345 eigen_vectors=components,1346 1349 projection=components, 1347 eigen_values=variances,1350 variances=variances, 1348 1351 variance_sum=variance_sum) 1349 1352 … … 1360 1363 1361 1364 #take care of the constant features 1362 stdev = numpy.std(Xd, axis=0 )1365 stdev = numpy.std(Xd, axis=0, ddof=self.ddof) 1363 1366 stdev[stdev == 0] = 1. # to prevent division by zero 1364 1367 relevant_features = stdev != 0 … … 1371 1374 n, m = Xd.shape 1372 1375 if n < m: 1373 C = numpy.dot(Xg.T, Xd.T) 1376 C = numpy.dot(Xg.T, Xd.T) / (m  self.ddof) 1374 1377 V, D, T = numpy.linalg.svd(C) 1375 U = numpy.dot(V.T, Xd) / numpy.sqrt(D.reshape(1, 1) )1378 U = numpy.dot(V.T, Xd) / numpy.sqrt(D.reshape(1, 1) * (m  self.ddof)) 1376 1379 else: 1377 C = numpy.dot(Xg, Xd) 1380 C = numpy.dot(Xg, Xd) / (n  self.ddof) 1378 1381 U, D, T = numpy.linalg.svd(C) 1379 1382 U = U.T # eigenvectors are now in rows … … 1404 1407 1405 1408 1406 class Spca(P ca):1409 class Spca(PCA): 1407 1410 def _perform_pca(self, dataset, Xd, Xg): 1408 1411 # define the Laplacian matrix … … 1418 1421 @deprecated_members({"pc_domain": "output_domain"}) 1419 1422 class PcaProjector(Projector): 1420 #: Synonymous for :obj:`~Orange.projection.linear.Projector.projection`. 1421 eigen_vectors = numpy.array(()).reshape(0, 0) 1422 1423 #: Array containing standard deviations of principal components. 1424 eigen_values = numpy.array(()) 1423 #: Array containing variances of principal components. 1424 variances = numpy.array(()) 1425 1425 1426 1426 #: Sum of all variances in the data set that was used to construct the PCA space. … … 1430 1430 ncomponents = 10 1431 1431 s = self.variance_sum 1432 cs = numpy.cumsum(self.eigen_values) / s 1432 cs = numpy.cumsum(self.variances) / s 1433 stdev = numpy.sqrt(self.variances) 1433 1434 return "\n".join([ 1434 1435 "PCA SUMMARY", … … 1438 1439 ["%10s" % a.name for a in self.output_domain.attributes]), 1439 1440 " ".join(["Std. deviation"] + 1440 ["%10.3f" % a for a in s elf.eigen_values]),1441 ["%10.3f" % a for a in stdev]), 1441 1442 " ".join(["Proportion Var"] + 1442 ["%10.3f" % a for a in self. eigen_values / s * 100]),1443 ["%10.3f" % a for a in self.variances / s * 100]), 1443 1444 " ".join(["Cumulative Var"] + 1444 1445 ["%10.3f" % a for a in cs * 100]), … … 1454 1455 ["%10s" % self.output_domain.attributes[1].name]), 1455 1456 " ".join(["Std. deviation"] + 1456 ["%10.3f" % a for a in s elf.eigen_values[:ncomponents]] +1457 ["%10.3f" % a for a in stdev[:ncomponents]] + 1457 1458 ["%10s" % ""] + 1458 ["%10.3f" % s elf.eigen_values[1]]),1459 ["%10.3f" % stdev[1]]), 1459 1460 " ".join(["Proportion Var"] + 1460 ["%10.3f" % a for a in self. eigen_values[:ncomponents] / s * 100] +1461 ["%10.3f" % a for a in self.variances[:ncomponents] / s * 100] + 1461 1462 ["%10s" % ""] + 1462 ["%10.3f" % (self. eigen_values[1] / s * 100)]),1463 ["%10.3f" % (self.variances[1] / s * 100)]), 1463 1464 " ".join(["Cumulative Var"] + 1464 1465 ["%10.3f" % a for a in cs[:ncomponents] * 100] + … … 1482 1483 1483 1484 s = self.variance_sum 1484 vc = self. eigen_values / s1485 cs = numpy.cumsum(self. eigen_values) / s1485 vc = self.variances / s 1486 cs = numpy.cumsum(self.variances) / s 1486 1487 1487 1488 fig = plt.figure() 1488 1489 ax = fig.add_subplot(111) 1489 1490 1490 x_axis = range(len(self. eigen_values))1491 x_axis = range(len(self.variances)) 1491 1492 plt.grid(True) 1492 1493 … … 1501 1502 ax.legend(loc=0) 1502 1503 1503 ax.axis([0.5, len(self. eigen_values)  0.5, 0, 1])1504 ax.axis([0.5, len(self.variances)  0.5, 0, 1]) 1504 1505 1505 1506 if filename: … … 1526 1527 raise ValueError, 'Two components are needed for biplot' 1527 1528 1528 if not (0 <= min(components) <= max(components) < len(self. eigen_values)):1529 if not (0 <= min(components) <= max(components) < len(self.variances)): 1529 1530 raise ValueError, 'Invalid components' 1530 1531 … … 1532 1533 Y = self.A[:, components[1]] 1533 1534 1534 vectorsX = self. eigen_vectors[:, components[0]]1535 vectorsY = self. eigen_vectors[:, components[1]]1536 1537 max_load_value = self. eigen_vectors.max() * 1.51535 vectorsX = self.variances[:, components[0]] 1536 vectorsY = self.variances[:, components[1]] 1537 1538 max_load_value = self.variances.max() * 1.5 1538 1539 1539 1540 fig = plt.figure() 1540 1541 ax1 = fig.add_subplot(111) 1541 1542 ax1.set_title(title + "\n") 1542 ax1.set_xlabel("PC%s (%d%%)" % (components[0], self. eigen_values[components[0]] / self.variance_sum * 100))1543 ax1.set_ylabel("PC%s (%d%%)" % (components[1], self. eigen_values[components[1]] / self.variance_sum * 100))1543 ax1.set_xlabel("PC%s (%d%%)" % (components[0], self.variances[components[0]] / self.variance_sum * 100)) 1544 ax1.set_ylabel("PC%s (%d%%)" % (components[1], self.variances[components[1]] / self.variance_sum * 100)) 1544 1545 ax1.xaxis.set_label_position('bottom') 1545 1546 ax1.xaxis.set_ticks_position('bottom') … … 1668 1669 return FdaProjector(input_domain=dataset.domain, 1669 1670 output_domain=out_domain, 1670 mean=Xm,1671 s tdev=stdev,1671 center=Xm, 1672 scale=stdev, 1672 1673 standardize=True, 1673 1674 eigen_vectors=U,
Note: See TracChangeset
for help on using the changeset viewer.