source: orange-bioinformatics/server_update/updateGO.py @ 1719:6e8861564778

Revision 1719:6e8861564778, 5.1 KB checked in by markotoplak, 20 months ago (diff)

Fixed update scripts for MeSH, GO, HomoloGene, NCBI_geneinfo, OMIM, PPI. Moved wget to Orange.utils.

Line 
1##!interval=7
2##!contact=ales.erjavec@fri.uni-lj.si
3
4from Orange.bio import obiGO, obiTaxonomy, obiGene, obiGenomicsUpdate
5
6import Orange.utils.environ as orngEnviron
7import Orange.utils.serverfiles as orngServerFiles
8import os, sys, shutil, urllib2, tarfile
9from getopt import getopt
10
11opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
12
13username = opt.get("-u", opt.get("--user", "username"))
14password = opt.get("-p", opt.get("--password", "password"))
15
16from collections import defaultdict
17
18tmpDir = os.path.join(orngEnviron.buffer_dir, "tmp_GO")
19try:
20    os.mkdir(tmpDir)
21except Exception:
22    pass
23
24serverFiles = orngServerFiles.ServerFiles(username, password)
25
26u = obiGO.Update(local_database_path = tmpDir)
27
28uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())
29
30def pp(*args, **kw): print args, kw
31
32if u.IsUpdatable(obiGO.Update.UpdateOntology, ()):
33    u.UpdateOntology()
34    filename = os.path.join(tmpDir, "gene_ontology_edit.obo.tar.gz")
35    ##load the ontology to test it
36    o = obiGO.Ontology(filename)
37    del o
38    ##upload the ontology
39    print "Uploading gene_ontology_edit.obo.tar.gz"
40    serverFiles.upload("GO", "gene_ontology_edit.obo.tar.gz", filename, title = "Gene Ontology (GO)",
41                       tags=["gene", "ontology", "GO", "essential", "#uncompressed:%i" % uncompressedSize(filename), "#version:%i" % obiGO.Ontology.version])
42    serverFiles.unprotect("GO", "gene_ontology_edit.obo.tar.gz")
43
44#from obiGeneMatch import _dbOrgMap
45#
46#exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
47#lines = [line.split("\t") for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/genes/taxonomy").readlines() if not line.startswith("#")]
48#keggOrgNames = dict([(line[1].strip(), line[-1][:-5].strip().replace("(", "").replace(")", "") if line[-1].endswith("(EST)\n") else line[-1].strip()) for line in lines if len(line)>1])
49
50#additionalNames = {"goa_arabidopsis":"Arabidopsis thaliana", "sgn":"Solanaceae", "PAMGO_Oomycetes":"Oomycete"}
51#essentialOrgs = ["goa_human", "sgd", "mgi", "dictyBase"]
52
53orgMap = {"352472":"44689", "562":"83333", "3055":None, "7955":None, "11103":None, "2104":None, "4754":None, "31033":None, "8355":None, "4577":None}
54
55#commonOrgs = dict([(obiGO.from_taxid(orgMap.get(id, id)).pop(), orgMap.get(id, id)) for id in obiTaxonomy.common_taxids() if orgMap.get(id, id) != None])
56commonOrgs = dict([(obiGO.from_taxid(id), id) for id in obiTaxonomy.common_taxids() if obiGO.from_taxid(id) != None])
57
58essentialOrgs = [obiGO.from_taxid(id) for id in obiTaxonomy.essential_taxids()]
59
60exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
61
62updatedTaxonomy = defaultdict(set)
63
64for org in u.GetAvailableOrganisms():
65    if org in exclude or org not in commonOrgs:
66        continue
67   
68    if u.IsUpdatable(obiGO.Update.UpdateAnnotation, (org,)):
69        u.UpdateAnnotation(org)
70        filename = os.path.join(tmpDir, "gene_association." + org + ".tar.gz")
71       
72        ## Load the annotations to test them and collect all taxon ids from them
73        a = obiGO.Annotations(filename, genematcher=obiGene.GMDirect())
74        taxons = set([ann.taxon for ann in a.annotations])
75        for taxId in [t.split(":")[-1] for t in taxons if "|" not in t]: ## exclude taxons with cardinality 2
76            updatedTaxonomy[taxId].add(org)
77        del a
78        ## Upload the annotation
79#        if org in _dbOrgMap:
80#            orgName = keggOrgNames[_dbOrgMap[org]].split("(")[0].strip()
81#        elif org in additionalNames:
82#            orgName = additionalNames[org]
83#        else:
84#            orgName = org
85        orgName = obiTaxonomy.name(commonOrgs[org])
86#            print "unknown organism name translation for:", org
87        print "Uploading", "gene_association." + org + ".tar.gz"
88        serverFiles.upload("GO", "gene_association." + org + ".tar.gz", filename, title = "GO Annotations for " + orgName,
89                           tags=["gene", "annotation", "ontology", "GO", orgName, "#uncompressed:%i" % uncompressedSize(filename),
90                                 "#organism:"+orgName, "#version:%i" % obiGO.Annotations.version] + (["essential"] if org in essentialOrgs else []))
91        serverFiles.unprotect("GO", "gene_association." + org + ".tar.gz")
92       
93try:
94    import cPickle
95#    tax = cPickle.load(open(os.path.join(tmpDir, "taxonomy.pickle"), "rb"))
96    tax = cPickle.load(open(orngServerFiles.localpath_download("GO", "taxonomy.pickle"), "rb"))
97except Exception:
98    tax = {}
99
100## Upload taxonomy if any differences in the updated taxonomy
101if any(tax.get(key, set()) != updatedTaxonomy.get(key, set()) for key in set(updatedTaxonomy)):
102    tax.update(updatedTaxonomy)
103    cPickle.dump(tax, open(os.path.join(tmpDir, "taxonomy.pickle"), "wb"))
104    print "Uploading", "taxonomy.pickle"
105    serverFiles.upload("GO", "taxonomy.pickle", os.path.join(tmpDir, "taxonomy.pickle"), title="GO taxon IDs",
106                       tags = ["GO", "taxon", "organism", "essential", "#version:%i" % obiGO.Taxonomy.version])
107    serverFiles.unprotect("GO", "taxonomy.pickle")
Note: See TracBrowser for help on using the repository browser.