#
source:
orange/Orange/OrangeWidgets/Unsupervised/OWPCA.py
@
11217:adbdaf6efe02

Revision 11217:adbdaf6efe02, 16.4 KB checked in by Ales Erjavec <ales.erjavec@…>, 17 months ago (diff) |
---|

Rev | Line | |
---|---|---|

[10798] | 1 | """ |

2 | <name>PCA</name> | |

[10801] | 3 | <description>Perform Principal Component Analysis</description> |

4 | <contact>ales.erjavec(@ at @)fri.uni-lj.si</contact> | |

[11217] | 5 | <icon>icons/PCA.svg</icon> |

[10801] | 6 | <tags>pca,principal,component,projection</tags> |

[10835] | 7 | <priority>3050</priority> |

[10798] | 8 | |

9 | """ | |

10 | import Orange | |

11 | import Orange.utils.addons | |

12 | ||

13 | from OWWidget import * | |

14 | import OWGUI | |

15 | ||

16 | import Orange | |

17 | import Orange.projection.linear as plinear | |

18 | ||

19 | import numpy as np | |

20 | import sys | |

21 | ||

22 | from plot.owplot import OWPlot | |

23 | from plot.owcurve import OWCurve | |

24 | from plot import owaxis | |

25 | ||

[10832] | 26 | |

[10798] | 27 | class ScreePlot(OWPlot): |

28 | def __init__(self, parent=None, name="Scree Plot"): | |

29 | OWPlot.__init__(self, parent, name=name) | |

[10799] | 30 | self.cutoff_curve = CutoffCurve([0.0, 0.0], [0.0, 1.0], |

31 | x_axis_key=owaxis.xBottom, y_axis_key=owaxis.yLeft) | |

32 | self.cutoff_curve.setVisible(False) | |

33 | self.cutoff_curve.set_style(OWCurve.Lines) | |

34 | self.add_custom_curve(self.cutoff_curve) | |

35 | ||

36 | def is_cutoff_enabled(self): | |

37 | return self.cutoff_curve and self.cutoff_curve.isVisible() | |

38 | ||

39 | def set_cutoff_curve_enabled(self, state): | |

40 | self.cutoff_curve.setVisible(state) | |

41 | ||

42 | def set_cutoff_value(self, value): | |

43 | xmin, xmax = self.x_scale() | |

44 | x = min(max(value, xmin), xmax) | |

45 | self.cutoff_curve.set_data([x, x], [0.0, 1.0]) | |

46 | ||

47 | def mousePressEvent(self, event): | |

[10812] | 48 | if self.isLegendEvent(event, QGraphicsView.mousePressEvent): |

49 | return | |

50 | ||

[10799] | 51 | if self.is_cutoff_enabled() and event.buttons() & Qt.LeftButton: |

52 | pos = self.mapToScene(event.pos()) | |

[10832] | 53 | x, _ = self.map_from_graph(pos) |

[10799] | 54 | xmin, xmax = self.x_scale() |

[10805] | 55 | if x >= xmin - 0.1 and x <= xmax + 0.1: |

56 | x = min(max(x, xmin), xmax) | |

[10799] | 57 | self.cutoff_curve.set_data([x, x], [0.0, 1.0]) |

58 | self.emit_cutoff_moved(x) | |

59 | return QGraphicsView.mousePressEvent(self, event) | |

60 | ||

61 | def mouseMoveEvent(self, event): | |

[10812] | 62 | if self.isLegendEvent(event, QGraphicsView.mouseMoveEvent): |

63 | return | |

64 | ||

[10799] | 65 | if self.is_cutoff_enabled() and event.buttons() & Qt.LeftButton: |

66 | pos = self.mapToScene(event.pos()) | |

67 | x, _ = self.map_from_graph(pos) | |

68 | xmin, xmax = self.x_scale() | |

[10805] | 69 | if x >= xmin - 0.5 and x <= xmax + 0.5: |

70 | x = min(max(x, xmin), xmax) | |

[10799] | 71 | self.cutoff_curve.set_data([x, x], [0.0, 1.0]) |

72 | self.emit_cutoff_moved(x) | |

[10848] | 73 | elif self.is_cutoff_enabled() and \ |

74 | self.is_pos_over_cutoff_line(event.pos()): | |

75 | self.setCursor(Qt.SizeHorCursor) | |

76 | else: | |

77 | self.setCursor(Qt.ArrowCursor) | |

78 | ||

[10799] | 79 | return QGraphicsView.mouseMoveEvent(self, event) |

80 | ||

81 | def mouseReleaseEvene(self, event): | |

82 | return QGraphicsView.mouseReleaseEvent(self, event) | |

83 | ||

84 | def x_scale(self): | |

85 | ax = self.axes[owaxis.xBottom] | |

86 | if ax.labels: | |

[10805] | 87 | return 0, len(ax.labels) - 1 |

[10799] | 88 | elif ax.scale: |

[10805] | 89 | return ax.scale[0], ax.scale[1] |

[10799] | 90 | else: |

91 | raise ValueError | |

92 | ||

93 | def emit_cutoff_moved(self, x): | |

94 | self.emit(SIGNAL("cutoff_moved(double)"), x) | |

95 | ||

96 | def set_axis_labels(self, *args): | |

97 | OWPlot.set_axis_labels(self, *args) | |

98 | self.map_transform = self.transform_for_axes() | |

99 | ||

[10848] | 100 | def is_pos_over_cutoff_line(self, pos): |

101 | x1 = self.inv_transform(owaxis.xBottom, pos.x() - 1.5) | |

102 | x2 = self.inv_transform(owaxis.xBottom, pos.x() + 1.5) | |

103 | y = self.inv_transform(owaxis.yLeft, pos.y()) | |

104 | if y < 0.0 or y > 1.0: | |

105 | return False | |

106 | curve_data = self.cutoff_curve.data() | |

107 | if not curve_data: | |

108 | return False | |

109 | cutoff = curve_data[0][0] | |

110 | return x1 < cutoff and cutoff < x2 | |

[10832] | 111 | |

[10799] | 112 | class CutoffCurve(OWCurve): |

113 | def __init__(self, *args, **kwargs): | |

114 | OWCurve.__init__(self, *args, **kwargs) | |

115 | self.setAcceptHoverEvents(True) | |

116 | self.setCursor(Qt.SizeHorCursor) | |

[10798] | 117 | |

[10832] | 118 | |

[10798] | 119 | class OWPCA(OWWidget): |

120 | settingsList = ["standardize", "max_components", "variance_covered", | |

[10807] | 121 | "use_generalized_eigenvectors", "auto_commit"] |

[10832] | 122 | |

[10798] | 123 | def __init__(self, parent=None, signalManager=None, title="PCA"): |

[10808] | 124 | OWWidget.__init__(self, parent, signalManager, title, wantGraph=True) |

[10798] | 125 | |

[10799] | 126 | self.inputs = [("Input Data", Orange.data.Table, self.set_data)] |

127 | self.outputs = [("Transformed Data", Orange.data.Table, Default), | |

128 | ("Eigen Vectors", Orange.data.Table)] | |

[10798] | 129 | |

130 | self.standardize = True | |

131 | self.max_components = 0 | |

132 | self.variance_covered = 100.0 | |

133 | self.use_generalized_eigenvectors = False | |

[10807] | 134 | self.auto_commit = False |

[10798] | 135 | |

136 | self.loadSettings() | |

137 | ||

138 | self.data = None | |

[10807] | 139 | self.changed_flag = False |

[10798] | 140 | |

141 | ##### | |

142 | # GUI | |

143 | ##### | |

144 | grid = QGridLayout() | |

[10806] | 145 | box = OWGUI.widgetBox(self.controlArea, "Components Selection", |

[10798] | 146 | orientation=grid) |

147 | ||

148 | label1 = QLabel("Max components", box) | |

149 | grid.addWidget(label1, 1, 0) | |

150 | ||

[10832] | 151 | sb1 = OWGUI.spin(box, self, "max_components", 0, 1000, |

[10798] | 152 | tooltip="Maximum number of components", |

[10799] | 153 | callback=self.on_update, |

[10798] | 154 | addToLayout=False, |

155 | keyboardTracking=False | |

156 | ) | |

[10799] | 157 | self.max_components_spin = sb1.control |

[10832] | 158 | self.max_components_spin.setSpecialValueText("All") |

[10798] | 159 | grid.addWidget(sb1.control, 1, 1) |

160 | ||

161 | label2 = QLabel("Variance covered", box) | |

162 | grid.addWidget(label2, 2, 0) | |

163 | ||

[10799] | 164 | sb2 = OWGUI.doubleSpin(box, self, "variance_covered", 1.0, 100.0, 1.0, |

[10798] | 165 | tooltip="Percent of variance covered.", |

[10799] | 166 | callback=self.on_update, |

[10798] | 167 | decimals=1, |

168 | addToLayout=False, | |

169 | keyboardTracking=False | |

170 | ) | |

171 | sb2.control.setSuffix("%") | |

172 | grid.addWidget(sb2.control, 2, 1) | |

173 | ||

174 | OWGUI.rubber(self.controlArea) | |

175 | ||

[10807] | 176 | box = OWGUI.widgetBox(self.controlArea, "Commit") |

177 | cb = OWGUI.checkBox(box, self, "auto_commit", "Commit on any change") | |

178 | b = OWGUI.button(box, self, "Commit", | |

179 | callback=self.update_components) | |

180 | OWGUI.setStopper(self, b, cb, "changed_flag", self.update_components) | |

181 | ||

[10798] | 182 | self.scree_plot = ScreePlot(self) |

183 | # self.scree_plot.set_main_title("Scree Plot") | |

184 | # self.scree_plot.set_show_main_title(True) | |

185 | self.scree_plot.set_axis_title(owaxis.xBottom, "Principal Components") | |

186 | self.scree_plot.set_show_axis_title(owaxis.xBottom, 1) | |

187 | self.scree_plot.set_axis_title(owaxis.yLeft, "Proportion of Variance") | |

188 | self.scree_plot.set_show_axis_title(owaxis.yLeft, 1) | |

[10799] | 189 | |

190 | self.variance_curve = self.scree_plot.add_curve( | |

191 | "Variance", | |

[10832] | 192 | Qt.red, Qt.red, 2, |

[10799] | 193 | xData=[], |

194 | yData=[], | |

195 | style=OWCurve.Lines, | |

196 | enableLegend=True, | |

197 | lineWidth=2, | |

198 | autoScale=1, | |

199 | x_axis_key=owaxis.xBottom, | |

200 | y_axis_key=owaxis.yLeft, | |

201 | ) | |

202 | ||

203 | self.cumulative_variance_curve = self.scree_plot.add_curve( | |

204 | "Cumulative Variance", | |

[10832] | 205 | Qt.darkYellow, Qt.darkYellow, 2, |

[10799] | 206 | xData=[], |

207 | yData=[], | |

208 | style=OWCurve.Lines, | |

209 | enableLegend=True, | |

210 | lineWidth=2, | |

211 | autoScale=1, | |

212 | x_axis_key=owaxis.xBottom, | |

213 | y_axis_key=owaxis.yLeft, | |

214 | ) | |

215 | ||

[10798] | 216 | self.mainArea.layout().addWidget(self.scree_plot) |

[10799] | 217 | self.connect(self.scree_plot, |

218 | SIGNAL("cutoff_moved(double)"), | |

219 | self.on_cutoff_moved | |

220 | ) | |

[10808] | 221 | |

222 | self.connect(self.graphButton, | |

223 | SIGNAL("clicked()"), | |

224 | self.scree_plot.save_to_file) | |

225 | ||

[10798] | 226 | self.components = None |

227 | self.variances = None | |

228 | self.variances_sum = None | |

229 | self.projector_full = None | |

[10806] | 230 | self.currently_selected = 0 |

[10798] | 231 | |

[10799] | 232 | self.resize(800, 400) |

[10798] | 233 | |

234 | def clear(self): | |

235 | """Clear widget state | |

236 | """ | |

237 | self.data = None | |

[10799] | 238 | self.scree_plot.set_cutoff_curve_enabled(False) |

[10798] | 239 | self.clear_cached() |

[10799] | 240 | self.variance_curve.setVisible(False) |

241 | self.cumulative_variance_curve.setVisible(False) | |

242 | ||

[10798] | 243 | def clear_cached(self): |

244 | """Clear cached components | |

245 | """ | |

246 | self.components = None | |

247 | self.variances = None | |

[10799] | 248 | self.variances_cumsum = None |

[10798] | 249 | self.projector_full = None |

[10806] | 250 | self.currently_selected = 0 |

[10798] | 251 | |

252 | def set_data(self, data=None): | |

253 | """Set the widget input data. | |

254 | """ | |

255 | self.clear() | |

256 | if data is not None: | |

257 | self.data = data | |

258 | self.on_change() | |

[10806] | 259 | else: |

260 | self.send("Transformed Data", None) | |

261 | self.send("Eigen Vectors", None) | |

[10798] | 262 | |

263 | def on_change(self): | |

[10806] | 264 | """Data has changed and we need to recompute the projection. |

265 | """ | |

[10798] | 266 | if self.data is None: |

267 | return | |

268 | self.clear_cached() | |

269 | self.apply() | |

270 | ||

271 | def on_update(self): | |

[10806] | 272 | """Component selection was changed by the user. |

273 | """ | |

[10798] | 274 | if self.data is None: |

275 | return | |

[10799] | 276 | self.update_cutoff_curve() |

[10806] | 277 | if self.currently_selected != self.number_of_selected_components(): |

[10807] | 278 | self.update_components_if() |

[10798] | 279 | |

280 | def construct_pca_all_comp(self): | |

281 | pca = plinear.PCA(standardize=self.standardize, | |

282 | max_components=0, | |

283 | variance_covered=1, | |

284 | use_generalized_eigenvectors=self.use_generalized_eigenvectors | |

285 | ) | |

286 | return pca | |

287 | ||

288 | def construct_pca(self): | |

[10799] | 289 | max_components = self.max_components |

290 | variance_covered = self.variance_covered | |

[10798] | 291 | pca = plinear.PCA(standardize=self.standardize, |

292 | max_components=max_components, | |

293 | variance_covered=variance_covered / 100.0, | |

294 | use_generalized_eigenvectors=self.use_generalized_eigenvectors | |

295 | ) | |

296 | return pca | |

297 | ||

298 | def apply(self): | |

[10806] | 299 | """Apply PCA on input data, caching the full projection, |

[10798] | 300 | then updating the selected components. |

301 | ||

302 | """ | |

303 | pca = self.construct_pca_all_comp() | |

304 | self.projector_full = projector = pca(self.data) | |

[10799] | 305 | |

306 | self.variances = self.projector_full.variances | |

307 | self.variances /= np.sum(self.variances) | |

308 | self.variances_cumsum = np.cumsum(self.variances) | |

309 | ||

[10832] | 310 | self.max_components_spin.setRange(0, len(self.variances)) |

[10833] | 311 | self.max_components = min(self.max_components, |

312 | len(self.variances) - 1) | |

[10798] | 313 | self.update_scree_plot() |

[10806] | 314 | self.update_cutoff_curve() |

[10807] | 315 | self.update_components_if() |

[10798] | 316 | |

[10807] | 317 | def update_components_if(self): |

318 | if self.auto_commit: | |

319 | self.update_components() | |

320 | else: | |

321 | self.changed_flag = True | |

[10832] | 322 | |

[10798] | 323 | def update_components(self): |

[10806] | 324 | """Update the output components. |

325 | """ | |

[10807] | 326 | if self.data is None: |

[10832] | 327 | return |

[10807] | 328 | |

[10798] | 329 | scale = self.projector_full.scale |

330 | center = self.projector_full.center | |

331 | components = self.projector_full.projection | |

332 | input_domain = self.projector_full.input_domain | |

333 | variances = self.projector_full.variances | |

334 | variance_sum = self.projector_full.variance_sum | |

335 | ||

[10832] | 336 | # Get selected components (based on max_components and |

[10798] | 337 | # variance_coverd) |

338 | pca = self.construct_pca() | |

339 | variances, components, variance_sum = pca._select_components(variances, components) | |

340 | ||

341 | projector = plinear.PcaProjector(input_domain=input_domain, | |

342 | standardize=self.standardize, | |

343 | scale=scale, | |

344 | center=center, | |

345 | projection=components, | |

346 | variances=variances, | |

347 | variance_sum=variance_sum) | |

348 | projected_data = projector(self.data) | |

349 | eigenvectors = self.eigenvectors_as_table(components) | |

350 | ||

[10806] | 351 | self.currently_selected = self.number_of_selected_components() |

352 | ||

[10799] | 353 | self.send("Transformed Data", projected_data) |

354 | self.send("Eigen Vectors", eigenvectors) | |

[10798] | 355 | |

[10807] | 356 | self.changed_flag = False |

357 | ||

[10798] | 358 | def eigenvectors_as_table(self, U): |

359 | features = [Orange.feature.Continuous("C%i" % i) \ | |

360 | for i in range(1, U.shape[1] + 1)] | |

361 | domain = Orange.data.Domain(features, False) | |

362 | return Orange.data.Table(domain, [list(v) for v in U]) | |

363 | ||

364 | def update_scree_plot(self): | |

[10806] | 365 | x_space = np.arange(0, len(self.variances)) |

[10798] | 366 | self.scree_plot.set_axis_enabled(owaxis.xBottom, True) |

367 | self.scree_plot.set_axis_enabled(owaxis.yLeft, True) | |

[10832] | 368 | self.scree_plot.set_axis_labels(owaxis.xBottom, |

[10798] | 369 | ["PC" + str(i + 1) for i in x_space]) |

370 | ||

[10806] | 371 | self.variance_curve.set_data(x_space, self.variances) |

372 | self.cumulative_variance_curve.set_data(x_space, self.variances_cumsum) | |

[10799] | 373 | self.variance_curve.setVisible(True) |

374 | self.cumulative_variance_curve.setVisible(True) | |

375 | ||

376 | self.scree_plot.set_cutoff_curve_enabled(True) | |

[10809] | 377 | self.scree_plot.replot() |

[10799] | 378 | |

379 | def on_cutoff_moved(self, value): | |

[10806] | 380 | """Cutoff curve was moved by the user. |

381 | """ | |

[10799] | 382 | components = int(np.floor(value)) + 1 |

[10806] | 383 | # Did the number of components actually change |

384 | self.max_components = components | |

385 | self.variance_covered = self.variances_cumsum[components - 1] * 100 | |

386 | if self.currently_selected != self.number_of_selected_components(): | |

[10807] | 387 | self.update_components_if() |

[10799] | 388 | |

389 | def update_cutoff_curve(self): | |

[10806] | 390 | """Update cutoff curve from 'Components Selection' control box. |

[10799] | 391 | """ |

[10832] | 392 | if self.max_components == 0: |

393 | # Special "All" value | |

394 | max_components = len(self.variances_cumsum) | |

395 | else: | |

396 | max_components = self.max_components | |

397 | ||

398 | variance = self.variances_cumsum[max_components - 1] * 100.0 | |

[10799] | 399 | if variance < self.variance_covered: |

[10832] | 400 | cutoff = float(max_components - 1) |

[10799] | 401 | else: |

402 | cutoff = np.searchsorted(self.variances_cumsum, | |

403 | self.variance_covered / 100.0) | |

404 | self.scree_plot.set_cutoff_value(cutoff + 0.5) | |

405 | ||

[10806] | 406 | def number_of_selected_components(self): |

407 | """How many components are selected. | |

408 | """ | |

409 | if self.data is None: | |

410 | return 0 | |

411 | ||

412 | variance_components = np.searchsorted(self.variances_cumsum, | |

413 | self.variance_covered / 100.0) | |

[10832] | 414 | if self.max_components == 0: |

415 | # Special "All" value | |

416 | max_components = len(self.variances_cumsum) | |

417 | else: | |

418 | max_components = self.max_components | |

419 | return min(variance_components + 1, max_components) | |

[10798] | 420 | |

[10808] | 421 | def sendReport(self): |

422 | self.reportSettings("PCA Settings", | |

423 | [("Max. components", self.max_components), | |

424 | ("Variance covered", "%i%%" % self.variance_covered), | |

425 | ]) | |

426 | if self.data is not None and self.projector_full: | |

427 | output_domain = self.projector_full.output_domain | |

428 | st_dev = np.sqrt(self.projector_full.variances) | |

429 | summary = [[""] + [a.name for a in output_domain.attributes], | |

430 | ["Std. deviation"] + ["%.3f" % sd for sd in st_dev], | |

431 | ["Proportion Var"] + ["%.3f" % v for v in self.variances * 100.0], | |

432 | ["Cumulative Var"] + ["%.3f" % v for v in self.variances_cumsum * 100.0] | |

433 | ] | |

434 | ||

435 | th = "<th>%s</th>".__mod__ | |

436 | header = "".join(map(th, summary[0])) | |

437 | td = "<td>%s</td>".__mod__ | |

438 | summary = ["".join(map(td, row)) for row in summary[1:]] | |

439 | tr = "<tr>%s</tr>".__mod__ | |

440 | summary = "\n".join(map(tr, [header] + summary)) | |

441 | summary = "<table>\n%s\n</table>" % summary | |

442 | ||

443 | self.reportSection("Summary") | |

444 | self.reportRaw(summary) | |

445 | ||

446 | self.reportSection("Scree Plot") | |

447 | self.reportImage(self.scree_plot.save_to_file_direct) | |

448 | ||

[10832] | 449 | |

[10798] | 450 | if __name__ == "__main__": |

451 | app = QApplication(sys.argv) | |

452 | w = OWPCA() | |

453 | data = Orange.data.Table("iris") | |

454 | w.set_data(data) | |

455 | w.show() | |

456 | app.exec_() |

**Note:**See TracBrowser for help on using the repository browser.