source: orange-bioinformatics/server_update/updateGO.py @ 1832:2c6d506ce3f2

Revision 1832:2c6d506ce3f2, 4.7 KB checked in by Flashpoint <vid.flashpoint@…>, 10 months ago (diff)

Added the short name tags in updateGO.py

Line 
1##!interval=7
2##!contact=ales.erjavec@fri.uni-lj.si
3
4from common import *
5
6from Orange.bio import obiGO, obiTaxonomy, obiGene, obiGenomicsUpdate
7
8import urllib2, tarfile
9
10from collections import defaultdict
11
12tmpDir = os.path.join(environ.buffer_dir, "tmp_GO")
13try:
14    os.mkdir(tmpDir)
15except Exception:
16    pass
17
18u = obiGO.Update(local_database_path = tmpDir)
19
20uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())
21
22def pp(*args, **kw): print args, kw
23
24if u.IsUpdatable(obiGO.Update.UpdateOntology, ()):
25    u.UpdateOntology()
26    filename = os.path.join(tmpDir, "gene_ontology_edit.obo.tar.gz")
27    ##load the ontology to test it
28    o = obiGO.Ontology(filename)
29    del o
30    ##upload the ontology
31    print "Uploading gene_ontology_edit.obo.tar.gz"
32    sf_server.upload("GO", "gene_ontology_edit.obo.tar.gz", filename, title = "Gene Ontology (GO)",
33                       tags=["gene", "ontology", "GO", "essential", "#uncompressed:%i" % uncompressedSize(filename), "#version:%i" % obiGO.Ontology.version])
34    sf_server.unprotect("GO", "gene_ontology_edit.obo.tar.gz")
35
36#from obiGeneMatch import _dbOrgMap
37#
38#exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
39#lines = [line.split("\t") for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/genes/taxonomy").readlines() if not line.startswith("#")]
40#keggOrgNames = dict([(line[1].strip(), line[-1][:-5].strip().replace("(", "").replace(")", "") if line[-1].endswith("(EST)\n") else line[-1].strip()) for line in lines if len(line)>1])
41
42#additionalNames = {"goa_arabidopsis":"Arabidopsis thaliana", "sgn":"Solanaceae", "PAMGO_Oomycetes":"Oomycete"}
43#essentialOrgs = ["goa_human", "sgd", "mgi", "dictyBase"]
44
45orgMap = {"352472":"44689", "562":"83333", "3055":None, "7955":None, "11103":None, "2104":None, "4754":None, "31033":None, "8355":None, "4577":None}
46
47#commonOrgs = dict([(obiGO.from_taxid(orgMap.get(id, id)).pop(), orgMap.get(id, id)) for id in obiTaxonomy.common_taxids() if orgMap.get(id, id) != None])
48commonOrgs = dict([(obiGO.from_taxid(id), id) for id in obiTaxonomy.common_taxids() if obiGO.from_taxid(id) != None])
49
50essentialOrgs = [obiGO.from_taxid(id) for id in obiTaxonomy.essential_taxids()]
51
52exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
53
54updatedTaxonomy = defaultdict(set)
55
56for org in u.GetAvailableOrganisms():
57    if org in exclude or org not in commonOrgs:
58        continue
59
60    if u.IsUpdatable(obiGO.Update.UpdateAnnotation, (org,)):
61        print "JA"
62        u.UpdateAnnotation(org)
63        filename = os.path.join(tmpDir, "gene_association." + org + ".tar.gz")
64       
65        ## Load the annotations to test them and collect all taxon ids from them
66        a = obiGO.Annotations(filename, genematcher=obiGene.GMDirect())
67        taxons = set([ann.taxon for ann in a.annotations])
68        for taxId in [t.split(":")[-1] for t in taxons if "|" not in t]: ## exclude taxons with cardinality 2
69            updatedTaxonomy[taxId].add(org)
70        del a
71        ## Upload the annotation
72#        if org in _dbOrgMap:
73#            orgName = keggOrgNames[_dbOrgMap[org]].split("(")[0].strip()
74#        elif org in additionalNames:
75#            orgName = additionalNames[org]
76#        else:
77#            orgName = org
78        orgName = obiTaxonomy.name(commonOrgs[org])
79#            print "unknown organism name translation for:", org
80        print "Uploading", "gene_association." + org + ".tar.gz"
81        sf_server.upload("GO", "gene_association." + org + ".tar.gz", filename, title = "GO Annotations for " + orgName,
82                           tags=["gene", "annotation", "ontology", "GO", orgName, "#uncompressed:%i" % uncompressedSize(filename),
83                                 "#organism:"+orgName, "#version:%i" % obiGO.Annotations.version] + (["essential"] if org in essentialOrgs else [])) #+ obiTaxonomy.shortname(org))
84        sf_server.unprotect("GO", "gene_association." + org + ".tar.gz")
85       
86try:
87    import cPickle
88#    tax = cPickle.load(open(os.path.join(tmpDir, "taxonomy.pickle"), "rb"))
89    tax = cPickle.load(open(sf_local.localpath_download("GO", "taxonomy.pickle"), "rb"))
90except Exception:
91    tax = {}
92
93## Upload taxonomy if any differences in the updated taxonomy
94if any(tax.get(key, set()) != updatedTaxonomy.get(key, set()) for key in set(updatedTaxonomy)):
95    tax.update(updatedTaxonomy)
96    cPickle.dump(tax, open(os.path.join(tmpDir, "taxonomy.pickle"), "wb"))
97    print "Uploading", "taxonomy.pickle"
98    sf_server.upload("GO", "taxonomy.pickle", os.path.join(tmpDir, "taxonomy.pickle"), title="GO taxon IDs",
99                       tags = ["GO", "taxon", "organism", "essential", "#version:%i" % obiGO.Taxonomy.version])
100    sf_server.unprotect("GO", "taxonomy.pickle")
Note: See TracBrowser for help on using the repository browser.