source: orange-bioinformatics/server_update/updateGO.py @ 1717:b9f395780e75

Revision 1717:b9f395780e75, 5.0 KB checked in by markotoplak, 20 months ago (diff)

Copied server update scripts from Orange source.

Line 
1##!interval=7
2##!contact=ales.erjavec@fri.uni-lj.si
3
4import obiGO, obiTaxonomy, obiGene, obiGenomicsUpdate, orngEnviron, orngServerFiles
5import os, sys, shutil, urllib2, tarfile
6from getopt import getopt
7
8opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
9
10username = opt.get("-u", opt.get("--user", "username"))
11password = opt.get("-p", opt.get("--password", "password"))
12
13from collections import defaultdict
14
15tmpDir = os.path.join(orngEnviron.bufferDir, "tmp_GO")
16try:
17    os.mkdir(tmpDir)
18except Exception:
19    pass
20
21serverFiles = orngServerFiles.ServerFiles(username, password)
22
23u = obiGO.Update(local_database_path = tmpDir)
24
25uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())
26
27def pp(*args, **kw): print args, kw
28
29if u.IsUpdatable(obiGO.Update.UpdateOntology, ()):
30    u.UpdateOntology()
31    filename = os.path.join(tmpDir, "gene_ontology_edit.obo.tar.gz")
32    ##load the ontology to test it
33    o = obiGO.Ontology(filename)
34    del o
35    ##upload the ontology
36    print "Uploading gene_ontology_edit.obo.tar.gz"
37    serverFiles.upload("GO", "gene_ontology_edit.obo.tar.gz", filename, title = "Gene Ontology (GO)",
38                       tags=["gene", "ontology", "GO", "essential", "#uncompressed:%i" % uncompressedSize(filename), "#version:%i" % obiGO.Ontology.version])
39    serverFiles.unprotect("GO", "gene_ontology_edit.obo.tar.gz")
40
41#from obiGeneMatch import _dbOrgMap
42#
43#exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
44#lines = [line.split("\t") for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/genes/taxonomy").readlines() if not line.startswith("#")]
45#keggOrgNames = dict([(line[1].strip(), line[-1][:-5].strip().replace("(", "").replace(")", "") if line[-1].endswith("(EST)\n") else line[-1].strip()) for line in lines if len(line)>1])
46
47#additionalNames = {"goa_arabidopsis":"Arabidopsis thaliana", "sgn":"Solanaceae", "PAMGO_Oomycetes":"Oomycete"}
48#essentialOrgs = ["goa_human", "sgd", "mgi", "dictyBase"]
49
50orgMap = {"352472":"44689", "562":"83333", "3055":None, "7955":None, "11103":None, "2104":None, "4754":None, "31033":None, "8355":None, "4577":None}
51
52#commonOrgs = dict([(obiGO.from_taxid(orgMap.get(id, id)).pop(), orgMap.get(id, id)) for id in obiTaxonomy.common_taxids() if orgMap.get(id, id) != None])
53commonOrgs = dict([(obiGO.from_taxid(id), id) for id in obiTaxonomy.common_taxids() if obiGO.from_taxid(id) != None])
54
55essentialOrgs = [obiGO.from_taxid(id) for id in obiTaxonomy.essential_taxids()]
56
57exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
58
59updatedTaxonomy = defaultdict(set)
60import obiTaxonomy
61
62for org in u.GetAvailableOrganisms():
63    if org in exclude or org not in commonOrgs:
64        continue
65   
66    if u.IsUpdatable(obiGO.Update.UpdateAnnotation, (org,)):
67        u.UpdateAnnotation(org)
68        filename = os.path.join(tmpDir, "gene_association." + org + ".tar.gz")
69       
70        ## Load the annotations to test them and collect all taxon ids from them
71        a = obiGO.Annotations(filename, genematcher=obiGene.GMDirect())
72        taxons = set([ann.taxon for ann in a.annotations])
73        for taxId in [t.split(":")[-1] for t in taxons if "|" not in t]: ## exclude taxons with cardinality 2
74            updatedTaxonomy[taxId].add(org)
75        del a
76        ## Upload the annotation
77#        if org in _dbOrgMap:
78#            orgName = keggOrgNames[_dbOrgMap[org]].split("(")[0].strip()
79#        elif org in additionalNames:
80#            orgName = additionalNames[org]
81#        else:
82#            orgName = org
83        orgName = obiTaxonomy.name(commonOrgs[org])
84#            print "unknown organism name translation for:", org
85        print "Uploading", "gene_association." + org + ".tar.gz"
86        serverFiles.upload("GO", "gene_association." + org + ".tar.gz", filename, title = "GO Annotations for " + orgName,
87                           tags=["gene", "annotation", "ontology", "GO", orgName, "#uncompressed:%i" % uncompressedSize(filename),
88                                 "#organism:"+orgName, "#version:%i" % obiGO.Annotations.version] + (["essential"] if org in essentialOrgs else []))
89        serverFiles.unprotect("GO", "gene_association." + org + ".tar.gz")
90       
91try:
92    import cPickle
93#    tax = cPickle.load(open(os.path.join(tmpDir, "taxonomy.pickle"), "rb"))
94    tax = cPickle.load(open(orngServerFiles.localpath_download("GO", "taxonomy.pickle"), "rb"))
95except Exception:
96    tax = {}
97
98## Upload taxonomy if any differences in the updated taxonomy
99if any(tax.get(key, set()) != updatedTaxonomy.get(key, set()) for key in set(updatedTaxonomy)):
100    tax.update(updatedTaxonomy)
101    cPickle.dump(tax, open(os.path.join(tmpDir, "taxonomy.pickle"), "wb"))
102    print "Uploading", "taxonomy.pickle"
103    serverFiles.upload("GO", "taxonomy.pickle", os.path.join(tmpDir, "taxonomy.pickle"), title="GO taxon IDs",
104                       tags = ["GO", "taxon", "organism", "essential", "#version:%i" % obiGO.Taxonomy.version])
105    serverFiles.unprotect("GO", "taxonomy.pickle")
Note: See TracBrowser for help on using the repository browser.