source: orange-bioinformatics/server_update/updateGO.py @ 1721:921b3780c6cc

Revision 1721:921b3780c6cc, 4.7 KB checked in by markotoplak, 20 months ago (diff)

Moved common functionality of the server files update scripts to a separate file.

Line 
1##!interval=7
2##!contact=ales.erjavec@fri.uni-lj.si
3
4from common import *
5
6from Orange.bio import obiGO, obiTaxonomy, obiGene, obiGenomicsUpdate
7
8import urllib2, tarfile
9
10from collections import defaultdict
11
12tmpDir = os.path.join(environ.buffer_dir, "tmp_GO")
13try:
14    os.mkdir(tmpDir)
15except Exception:
16    pass
17
18u = obiGO.Update(local_database_path = tmpDir)
19
20uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())
21
22def pp(*args, **kw): print args, kw
23
24if u.IsUpdatable(obiGO.Update.UpdateOntology, ()):
25    u.UpdateOntology()
26    filename = os.path.join(tmpDir, "gene_ontology_edit.obo.tar.gz")
27    ##load the ontology to test it
28    o = obiGO.Ontology(filename)
29    del o
30    ##upload the ontology
31    print "Uploading gene_ontology_edit.obo.tar.gz"
32    sf_server.upload("GO", "gene_ontology_edit.obo.tar.gz", filename, title = "Gene Ontology (GO)",
33                       tags=["gene", "ontology", "GO", "essential", "#uncompressed:%i" % uncompressedSize(filename), "#version:%i" % obiGO.Ontology.version])
34    sf_server.unprotect("GO", "gene_ontology_edit.obo.tar.gz")
35
36#from obiGeneMatch import _dbOrgMap
37#
38#exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
39#lines = [line.split("\t") for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/genes/taxonomy").readlines() if not line.startswith("#")]
40#keggOrgNames = dict([(line[1].strip(), line[-1][:-5].strip().replace("(", "").replace(")", "") if line[-1].endswith("(EST)\n") else line[-1].strip()) for line in lines if len(line)>1])
41
42#additionalNames = {"goa_arabidopsis":"Arabidopsis thaliana", "sgn":"Solanaceae", "PAMGO_Oomycetes":"Oomycete"}
43#essentialOrgs = ["goa_human", "sgd", "mgi", "dictyBase"]
44
45orgMap = {"352472":"44689", "562":"83333", "3055":None, "7955":None, "11103":None, "2104":None, "4754":None, "31033":None, "8355":None, "4577":None}
46
47#commonOrgs = dict([(obiGO.from_taxid(orgMap.get(id, id)).pop(), orgMap.get(id, id)) for id in obiTaxonomy.common_taxids() if orgMap.get(id, id) != None])
48commonOrgs = dict([(obiGO.from_taxid(id), id) for id in obiTaxonomy.common_taxids() if obiGO.from_taxid(id) != None])
49
50essentialOrgs = [obiGO.from_taxid(id) for id in obiTaxonomy.essential_taxids()]
51
52exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
53
54updatedTaxonomy = defaultdict(set)
55
56for org in u.GetAvailableOrganisms():
57    if org in exclude or org not in commonOrgs:
58        continue
59   
60    if u.IsUpdatable(obiGO.Update.UpdateAnnotation, (org,)):
61        u.UpdateAnnotation(org)
62        filename = os.path.join(tmpDir, "gene_association." + org + ".tar.gz")
63       
64        ## Load the annotations to test them and collect all taxon ids from them
65        a = obiGO.Annotations(filename, genematcher=obiGene.GMDirect())
66        taxons = set([ann.taxon for ann in a.annotations])
67        for taxId in [t.split(":")[-1] for t in taxons if "|" not in t]: ## exclude taxons with cardinality 2
68            updatedTaxonomy[taxId].add(org)
69        del a
70        ## Upload the annotation
71#        if org in _dbOrgMap:
72#            orgName = keggOrgNames[_dbOrgMap[org]].split("(")[0].strip()
73#        elif org in additionalNames:
74#            orgName = additionalNames[org]
75#        else:
76#            orgName = org
77        orgName = obiTaxonomy.name(commonOrgs[org])
78#            print "unknown organism name translation for:", org
79        print "Uploading", "gene_association." + org + ".tar.gz"
80        sf_server.upload("GO", "gene_association." + org + ".tar.gz", filename, title = "GO Annotations for " + orgName,
81                           tags=["gene", "annotation", "ontology", "GO", orgName, "#uncompressed:%i" % uncompressedSize(filename),
82                                 "#organism:"+orgName, "#version:%i" % obiGO.Annotations.version] + (["essential"] if org in essentialOrgs else []))
83        sf_server.unprotect("GO", "gene_association." + org + ".tar.gz")
84       
85try:
86    import cPickle
87#    tax = cPickle.load(open(os.path.join(tmpDir, "taxonomy.pickle"), "rb"))
88    tax = cPickle.load(open(sf_local.localpath_download("GO", "taxonomy.pickle"), "rb"))
89except Exception:
90    tax = {}
91
92## Upload taxonomy if any differences in the updated taxonomy
93if any(tax.get(key, set()) != updatedTaxonomy.get(key, set()) for key in set(updatedTaxonomy)):
94    tax.update(updatedTaxonomy)
95    cPickle.dump(tax, open(os.path.join(tmpDir, "taxonomy.pickle"), "wb"))
96    print "Uploading", "taxonomy.pickle"
97    sf_server.upload("GO", "taxonomy.pickle", os.path.join(tmpDir, "taxonomy.pickle"), title="GO taxon IDs",
98                       tags = ["GO", "taxon", "organism", "essential", "#version:%i" % obiGO.Taxonomy.version])
99    sf_server.unprotect("GO", "taxonomy.pickle")
Note: See TracBrowser for help on using the repository browser.