Changeset 1534:013203e8fa81 in orange-bioinformatics
- Timestamp:
- 02/01/12 16:20:15 (16 months ago)
- Branch:
- default
- File:
-
- 1 edited
-
widgets/OWKEGGPathwayBrowser.py (modified) (16 diffs)
Legend:
- Unmodified
- Added
- Removed
-
widgets/OWKEGGPathwayBrowser.py
r1525 r1534 12 12 import obiTaxonomy 13 13 14 import obiKEGG2 as obiKEGG 15 import obiGeneSets 16 import orngMisc 17 14 18 import webbrowser 15 19 … … 20 24 from orngDataCaching import data_hints 21 25 26 USE_THREADING = True 27 28 if USE_THREADING: 29 import threading 30 from functools import partial 31 def threading_queued_invoke(qobj, func): 32 def safe_wrapper(*args, **kwargs): 33 QMetaObject.invokeMethod(qobj, "queuedInvoke", 34 Qt.QueuedConnection, 35 Q_ARG("PyQt_PyObject", 36 partial(func, *args, **kwargs))) 37 return safe_wrapper 38 22 39 def split_and_strip(string, sep=None): 23 40 return [s.strip() for s in string.split(sep)] … … 193 210 194 211 self.allOrganismCodes = {} 195 212 196 213 self.organismCodes = [] 197 198 self.organismComboBox = cb = OWGUI.comboBox(self.controlArea, self, "organismIndex", box="Organism", items=[], callback=self.Update, addSpace=True, debuggingEnabled=0) 199 cb.setMaximumWidth(200) 200 201 self.signalManager.freeze(self).push() #setFreeze(1) 202 QTimer.singleShot(100, self.UpdateOrganismComboBox) 214 215 box = OWGUI.widgetBox(self.controlArea, "Organism") 216 self.organismComboBox = cb = OWGUI.comboBox(box, self, 217 "organismIndex", 218 items=[], 219 callback=self.OrganismSelectionCallback, 220 addSpace=True, 221 debuggingEnabled=0) 222 OWGUI.button(box, self, "Update Pathways", callback=self.UpdateToLatestPathways) 223 224 # cb.setMaximumWidth(200) 225 226 self.signalManager.freeze(self).push() 203 227 204 228 box = OWGUI.widgetBox(self.controlArea, "Gene attribute") 205 self.geneAttrCombo = OWGUI.comboBox(box, self, "geneAttrIndex", callback=self.Update) 206 OWGUI.checkBox(box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.UseAttrNamesCallback) 229 self.geneAttrCombo = OWGUI.comboBox(box, self, "geneAttrIndex", 230 callback=self.Update) 231 232 OWGUI.checkBox(box, self, "useAttrNames", 233 "Use variable names", 234 disables=[(-1, self.geneAttrCombo)], 235 callback=self.UseAttrNamesCallback) 236 207 237 self.geneAttrCombo.setDisabled(bool(self.useAttrNames)) 208 238 209 # OWGUI.checkBox(box, self, "caseSensitive", "Case sensitive gene matching", callback=self.Update)210 239 OWGUI.separator(self.controlArea) 211 240 212 OWGUI.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) 241 OWGUI.checkBox(self.controlArea, self, "useReference", 242 "From signal", 243 box="Reference", 244 callback=self.Update) 245 213 246 OWGUI.separator(self.controlArea) 214 247 215 OWGUI.checkBox(self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView) 216 217 OWGUI.checkBox(self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=lambda :self.pathwayView.updateTransform()) 248 OWGUI.checkBox(self.controlArea, self, "showOrthology", 249 "Show pathways in full orthology", 250 box="Orthology", 251 callback=self.UpdateListView) 252 253 OWGUI.checkBox(self.controlArea, self, "autoResize", 254 "Resize to fit", 255 box="Image", 256 callback=self.UpdatePathwayViewTransform) 257 218 258 OWGUI.separator(self.controlArea) 219 259 … … 232 272 self.listView.setAllColumnsShowFocus(1) 233 273 self.listView.setColumnCount(4) 234 self.listView.setHeaderLabels(["Pathway", "P value", "Genes", "Reference"]) 274 self.listView.setHeaderLabels(["Pathway", "P value", 275 "Genes", "Reference"]) 235 276 236 277 self.listView.setSelectionMode(QAbstractItemView.SingleSelection) 237 278 238 279 self.listView.setSortingEnabled(True) 239 #self.listView.setAllColumnsShowFocus(1)280 240 281 self.listView.setMaximumHeight(200) 241 282 242 self.connect(self.listView, SIGNAL("itemSelectionChanged()"), self.UpdatePathwayView) 243 244 self.connect(self.graphButton, SIGNAL("clicked()"), self.saveGraph) 283 self.connect(self.listView, 284 SIGNAL("itemSelectionChanged()"), 285 self.UpdatePathwayView) 286 287 self.connect(self.graphButton, 288 SIGNAL("clicked()"), 289 self.saveGraph) 245 290 246 291 self.ctrlPressed = False … … 248 293 self.data = None 249 294 self.refData = None 250 self.loadedOrganism = None251 295 252 296 self.resize(800, 600) 253 297 254 self.connect(self, SIGNAL("widgetStateChanged(QString, int, QString)"), self.onStateChange) 298 self.connect(self, 299 SIGNAL("widgetStateChanged(QString, int, QString)"), 300 self.onStateChange) 301 302 self.has_new_data = False 303 self.has_new_reference_set = False 304 305 self.setEnabled(False) 306 QTimer.singleShot(100, self.UpdateOrganismComboBox) 307 255 308 256 309 def UpdateOrganismComboBox(self): 257 310 try: 258 self.progressBarInit() 259 with orngServerFiles.DownloadProgress.setredirect(self.progressBarSet): 260 genome = obiKEGG.KEGGGenome() 261 self.progressBarFinished() 311 genome = obiKEGG.KEGGGenome() 312 all_codes = list(genome) 262 313 263 314 self.allOrganismCodes = genome 264 315 265 316 essential = genome.essential_organisms() 266 267 local = [name.split(".")[0].split("_")[-1] for name in orngServerFiles.listfiles("KEGG") if "kegg_genes" in name] 268 self.organismCodes = [(code, organism.definition) for code, organism in self.allOrganismCodes.items() if code in local or code in essential] 269 self.organismCodes.sort() 317 common = genome.common_organisms() 318 common = [c for c in common if c not in essential] 319 320 self.infoLabel.setText("Fetching organism definitions\n") 321 322 pb = OWGUI.ProgressBar(self, len(essential + common)) 323 codes = [] 324 for i, code in enumerate(essential + common): 325 codes.append((code, genome[code].definition)) 326 pb.advance() 327 pb.finish() 328 self.organismCodes = codes 329 270 330 items = [desc for code, desc in self.organismCodes] 331 271 332 self.organismCodes = [code for code, desc in self.organismCodes] 272 333 334 # TODO: Add option to specify additional organisms not 335 # in the common list. 336 273 337 self.organismComboBox.addItems(items) 274 338 finally: 339 self.setEnabled(True) 340 self.infoLabel.setText("No data on input\n") 275 341 self.signalManager.freeze(self).pop() #setFreeze(0) 276 342 343 def Clear(self): 344 self.infoLabel.setText("No data on input\n") 345 self.listView.clear() 346 self.ClearPathway() 347 348 self.send("Selected Examples", None) 349 self.send("Unselected Examples", None) 350 351 def ClearPathway(self): 352 self.pathwayView.SetPathway(None) 353 self.selectedObjects = defaultdict(list) 277 354 278 355 def SetData(self, data=None): 279 if not self.organismCodes: ## delay this call until we retrieve organism codes from the server files280 QTimer.singleShot(200, lambda: self.SetData(data))281 return282 283 356 self.closeContext() 284 357 self.data = data 285 358 self.warning(0) 286 if data :359 if data is not None: 287 360 self.SetGeneAttrCombo() 288 361 taxid = data_hints.get_hint(data, "taxid", None) … … 292 365 self.organismIndex = self.organismCodes.index(code) 293 366 except Exception, ex: 367 # self.log.exception(self, level=self.log.INFO) 294 368 print ex, taxid 295 369 … … 297 371 298 372 self.openContext("", data) 299 self.Update() 300 else: 301 self.infoLabel.setText("No data on input\n") 302 self.listView.clear() 303 self.selectedObjects = defaultdict(list) 304 self.pathwayView.SetPathway(None) 305 self.send("Selected Examples", None) 306 self.send("Unselected Examples", None) 373 else: 374 self.Clear() 375 376 self.has_new_data = True 307 377 308 378 def SetRefData(self, data=None): 309 379 self.refData = data 310 if self.useReference and self.data and self.organismCodes: 380 self.has_new_reference_set = True 381 382 def handleNewSignals(self): 383 if self.has_new_data or (self.has_new_reference_set and \ 384 self.useReference): 311 385 self.Update() 312 386 387 self.has_new_data = False 388 self.has_new_reference_set = False 389 313 390 def UseAttrNamesCallback(self): 314 ## self.geneAttrCombo.setDisabled(bool(self.useAttrNames))315 391 self.Update() 316 392 393 def OrganismSelectionCallback(self): 394 self.Update() 395 317 396 def SetGeneAttrCombo(self): 318 self.geneAttrCandidates = self.data.domain.attributes + self.data.domain.getmetas().values() 319 self.geneAttrCandidates = filter(lambda v:v.varType in [orange.VarTypes.Discrete ,orange.VarTypes.String], self.geneAttrCandidates) 397 self.geneAttrCandidates = self.data.domain.attributes + \ 398 self.data.domain.getmetas().values() 399 self.geneAttrCandidates = filter(lambda v:v.varType in [orange.VarTypes.Discrete, 400 orange.VarTypes.String], 401 self.geneAttrCandidates) 320 402 self.geneAttrCombo.clear() 321 #print 'geneAttrCandidates', self.geneAttrCandidates403 322 404 self.geneAttrCombo.addItems([var.name for var in self.geneAttrCandidates]) 323 return324 325 def PreDownload(self, org=None, pb=None):326 pb, finish = (OWGUI.ProgressBar(self, 0), True) if pb is None else (pb, False)327 files = ["kegg_brite.tar.gz", "kegg_pathways_map.tar.gz", "kegg_genome.tar.gz"]328 if org:329 files += ["kegg_genes_%s.tar.gz" % org, "kegg_pathways_%s.tar.gz" % org]330 files = [file for file in files if file not in orngServerFiles.listfiles("KEGG")]331 pb.iter += len(files) * 100332 for i, filename in enumerate(files):333 # print filename334 orngServerFiles.download("KEGG", filename, callback=pb.advance)335 if finish:336 pb.finish()337 405 338 406 def UpdateListView(self): … … 343 411 allPathways = self.org.pathways() 344 412 allRefPathways = obiKEGG.pathways("map") 345 self.progressBarFinished()413 # self.progressBarFinished() 346 414 items = [] 415 kegg_pathways = obiKEGG.KEGGPathways() 416 347 417 if self.showOrthology: 348 418 self.koOrthology = obiKEGG.KEGGBrite("ko00001") … … 360 430 item = QTreeWidgetItem(lvItem) 361 431 id = "path:"+self.organismCodes[min(self.organismIndex, len(self.organismCodes)-1)] + koEntry.title[:5] 432 p = kegg_pathways.get_entry(id) 362 433 if koEntry.title[:5] in path_ids: 434 if p is None: 435 # In case the genesets still have obsolete entries 436 name = koEntry.title 437 else: 438 name = p.name 363 439 genes, p_value, ref = self.pathways[id] 364 item.setText(0, obiKEGG.KEGGPathway(id).title) 365 # print id, obiKEGG.KEGGPathway(id).title 440 item.setText(0, name) 366 441 item.setText(1, "%.5f" % p_value) 367 442 item.setText(2, "%i of %i" %(len(genes), len(self.genes))) 368 443 item.setText(3, "%i of %i" %(ref, len(self.referenceGenes))) 369 item.pathway_id = id 444 item.pathway_id = id if p is not None else None 370 445 else: 371 item.setText(0, obiKEGG.KEGGPathway(id).title if id in allPathways else koEntry.title)446 item.setText(0, p.name if id in allPathways else koEntry.title) 372 447 if id in allPathways: 373 448 item.pathway_id = id … … 392 467 for id, (genes, p_value, ref) in pathways: 393 468 item = QTreeWidgetItem(self.listView) 394 item.setText(0, obiKEGG.KEGGPathway(id).title)469 item.setText(0, kegg_pathways.get_entry(id).name) 395 470 item.setText(1, "%.5f" % p_value) 396 471 item.setText(2, "%i of %i" %(len(genes), len(self.genes))) … … 421 496 self.pathwayView.SetPathway(None) 422 497 return 423 self.pathway = obiKEGG.KEGGPathway(item.pathway_id) 498 499 if USE_THREADING: 500 result = {} 501 def call(pathway_id): 502 result["pathway"] = p = obiKEGG.KEGGPathway(pathway_id) 503 p.title # makes sure the kgml file is downloaded 504 p.image 505 p.get_image() # makes sure the image is downloaded 506 507 self.setEnabled(False) 508 try: 509 thread = threading.Thread(None, 510 call, 511 name="get_kgml_and_image", 512 args=(item.pathway_id,)) 513 thread.start() 514 while thread.is_alive(): 515 thread.join(timeout=0.025) 516 qApp.processEvents() 517 finally: 518 self.setEnabled(True) 519 if "pathway" in result: 520 self.pathway = result["pathway"] 521 else: 522 raise Exception("Could not get kgml and pathway image") 523 else: 524 self.pathway = obiKEGG.KEGGPathway(item.pathway_id) 525 424 526 self.pathwayView.SetPathway(self.pathway, self.pathways.get(item.pathway_id, [[]])[0]) 425 527 426 528 def UpdatePathwayViewTransform(self): 529 self.pathwayView.updateTransform() 530 427 531 def Update(self): 428 532 if not self.data: … … 438 542 if any("," in gene for gene in genes): 439 543 genes = reduce(list.__add__, (split_and_strip(gene, ",") for gene in genes), []) 440 self.information(0, "Separators detected in input gene names. Assuming multiple genes per example.")441 else: 442 self.error(0, "Cannot ext act gene names from input")544 self.information(0, "Separators detected in input gene names. Assuming multiple genes per instance.") 545 else: 546 self.error(0, "Cannot extract gene names from input") 443 547 genes = [] 444 548 org_code = self.organismCodes[min(self.organismIndex, len(self.organismCodes)-1)] 445 if self.loadedOrganism != org_code: 446 self.PreDownload(org_code, pb=pb) 447 self.org = obiKEGG.KEGGOrganism(org_code) 448 self.loadedOrganism = org_code 449 uniqueGenes, conflicting, unknown = self.org.get_unique_gene_ids(set(genes), self.caseSensitive) 549 550 if USE_THREADING: 551 result = {} 552 def callable(*args, **kwargs): 553 result["org"] = org = obiKEGG.KEGGOrganism(org_code) 554 # Make sure genes are cached for global reference set 555 result["genes"] = org.genes.keys() 556 557 self.setEnabled(False) 558 try: 559 thread = threading.Thread(None, callable, 560 name="get_organism_genes", 561 ) 562 thread.start() 563 while thread.is_alive(): 564 thread.join(timeout=0.025) 565 qApp.processEvents() 566 finally: 567 self.setEnabled(True) 568 569 if "org" in result: 570 org = result["org"] 571 else: 572 raise Exception("Could not get organism genes") 573 else: 574 org = obiKEGG.KEGGOrganism(org_code) 575 576 uniqueGenes, conflicting, unknown = org.get_unique_gene_ids(set(genes), self.caseSensitive) 450 577 genesCount = len(set(genes)) 451 578 self.infoLabel.setText("%i unique gene names on input\n%i (%.1f%%) genes names matched" % (genesCount, len(uniqueGenes), 100.0*len(uniqueGenes)/genesCount if genes else 0.0)) 452 # if conflicting: 453 # print >> sys.stderr, "Conflicting genes:", conflicting 454 # if unknown: 455 # print >> sys.stderr, "Unknown genes:", unknown 579 456 580 self.information(1) 457 581 if self.useReference and self.refData: … … 464 588 reference = reduce(list.__add__, (split_and_strip(gene, ",") for gene in reference), []) 465 589 self.information(1, "Separators detected in reference gene names. Assuming multiple genes per example.") 466 uniqueRefGenes, conflicting, unknown = self.org.get_unique_gene_ids(set(reference), self.caseSensitive)590 uniqueRefGenes, conflicting, unknown = org.get_unique_gene_ids(set(reference), self.caseSensitive) 467 591 self.referenceGenes = reference = uniqueRefGenes.keys() 468 592 else: 469 self.referenceGenes = reference = self.org.get_genes()593 self.referenceGenes = reference = org.get_genes() 470 594 self.uniqueGenesDict = uniqueGenes 471 595 self.genes = uniqueGenes.keys() 472 596 self.revUniqueGenesDict = dict([(val, key) for key, val in self.uniqueGenesDict.items()]) 473 # self.progressBarInit() 474 # with orngServerFiles.DownloadProgress.setredirect(self.progressBarSet): 475 self.pathways = self.org.get_enriched_pathways(self.genes, reference, callback=lambda value: pb.advance()) #self.progressBarSet) 597 598 taxid = obiKEGG.to_taxid(org.org_code) 599 r_tax_map = dict((v,k) for k, v in obiKEGG.KEGGGenome.TAXID_MAP.items()) 600 if taxid in r_tax_map: 601 taxid = r_tax_map[taxid] 602 603 with orngServerFiles.DownloadProgress.setredirect(self.progressBarSet): 604 kegg_gs_collections = list(obiGeneSets.collections((("KEGG",), taxid))) 605 606 print self.genes 607 print taxid 608 # print kegg_gs_collections 609 if USE_THREADING: 610 result = {} 611 def callable(*args, **kwargs): 612 # result["result"] = org.get_enriched_pathways(*args, **kwargs) 613 result["result"] = pathway_enrichment(*args, **kwargs) 614 615 self.setEnabled(False) 616 try: 617 thread = threading.Thread(None, callable, 618 name="get_enriched_pathways", 619 args=(kegg_gs_collections, 620 self.genes, 621 reference), 622 kwargs={"callback": 623 threading_queued_invoke(self, 624 lambda value:pb.advance())} 625 ) 626 627 thread.start() 628 while thread.is_alive(): 629 thread.join(timeout=0.025) 630 qApp.processEvents() 631 finally: 632 self.setEnabled(True) 633 634 if "result" in result: 635 self.pathways = result["result"] 636 else: 637 raise Exception('Could not get enriched pathways') 638 else: 639 self.pathways = org.get_enriched_pathways(self.genes, reference, 640 callback=lambda value: pb.advance()) 641 642 self.org = org 476 643 if not self.pathways: 477 644 self.warning(0, "No enriched pathways found.") 478 645 else: 479 646 self.warning(0) 480 481 # self.progressBarFinished() 647 482 648 self.UpdateListView() 483 649 pb.finish() 484 ## print self.bestPValueItem 485 #self.bestPValueItem.setSelected(True) 486 #self.UpdatePathwayView() 650 487 651 488 652 def SelectObjects(self, objs): … … 513 677 514 678 if self.useAttrNames: 515 # selectedGenes = reduce(set.union, self.selectedObjects.values(), set())516 679 selectedVars = [self.data.domain[self.uniqueGenesDict[gene]] for gene in selectedGenes] 517 680 newDomain = orange.Domain(selectedVars ,0) … … 522 685 selectedExamples = [] 523 686 otherExamples = [] 524 # selectedGenes = reduce(set.union, self.selectedObjects.values(), set())525 687 for ex in self.data: 526 688 names = [self.revUniqueGenesDict.get(name, None) for name in split_and_strip(str(ex[geneAttr]), ",")] … … 568 730 sizeDlg.exec_() 569 731 732 @pyqtSignature("queuedInvoke(PyQt_PyObject)") 733 def queuedInvoke(self, func): 734 # print "queued invoke of", func 735 func() 736 737 def progressBarSet(self, value): 738 # print "Enter" 739 if not getattr(self, "_in_progress_update", False): 740 self._in_progress_update = True 741 try: 742 OWWidget.progressBarSet(self, value) 743 finally: 744 self._in_progress_update = False 745 # else: 746 # print "=====" 747 748 # print "Exit" 749 750 def onDeleteWidget(self): 751 """ Called before the widget is removed from the canvas. 752 """ 753 self.org = None 754 import gc 755 gc.collect() # Force collection 756 757 def UpdateToLatestPathways(self): 758 pass 759 760 import obiProb 761 762 def pathway_enrichment(genesets, genes, reference, prob=None, callback=None): 763 result_sets = [] 764 p_values = [] 765 if prob is None: 766 prob = obiProb.Hypergeometric() 767 768 for i, gs in enumerate(genesets): 769 cluster = gs.genes.intersection(genes) 770 ref = gs.genes.intersection(reference) 771 k = len(cluster) 772 N = len(reference) 773 m = len(ref) 774 n = len(genes) 775 if k: 776 p_val = prob.p_value(k, N, m, n) 777 result_sets.append((gs.id, cluster, ref)) 778 p_values.append(p_val) 779 if callback is not None: 780 callback(100.0 * i / len(genes)) 781 782 # FDR correction 783 p_values = obiProb.FDR(p_values) 784 785 return dict([(id, (genes, p_val, len(ref))) \ 786 for (id, genes, ref), p_val in zip(result_sets, p_values)] 787 ) 788 570 789 if __name__=="__main__": 571 790 app = QApplication(sys.argv) 572 data = orange.ExampleTable(" ../../../../orange/doc/datasets/brown-selected.tab")791 data = orange.ExampleTable("brown-selected.tab") 573 792 w = OWKEGGPathwayBrowser() 574 793 w.UpdateOrganismComboBox() 575 794 ## app.setMainWidget(w) 576 795 w.show() 577 w.SetData(data) 796 w.SetData(orange.ExampleTable(data[-20:])) 797 QTimer.singleShot(10, w.handleNewSignals) 578 798 app.exec_() 579 799 w.saveSettings()
Note: See TracChangeset
for help on using the changeset viewer.
