source: orange/install-scripts/orngServer/serverUpdateScripts/updateKEGG.py @ 8042:ffcb93bc9028

Revision 8042:ffcb93bc9028, 7.4 KB checked in by markotoplak, 3 years ago (diff)

Hierarchical clustering: also catch RuntimeError when importing matplotlib (or the documentation could not be built on server).

Line 
1##!interval=7
2##!contact=ales.erjavec@fri.uni-lj.si
3
4import obiKEGG, obiGene, obiTaxonomy
5import orngServerFiles, orngEnviron
6import os, sys, tarfile, urllib2, shutil
7from getopt import getopt
8
9import obiData
10obiKEGG.borg_class(obiData.FtpDownloader) #To limit the number of connections
11
12
13opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
14
15username = opt.get("-u", opt.get("--user", "username"))
16password = opt.get("-p", opt.get("--password", "password"))
17
18tmp_path = os.path.join(orngEnviron.bufferDir, "tmp_KEGG/")
19
20#u = obiKEGG.Update(local_database_path=path)
21serverFiles=orngServerFiles.ServerFiles(username, password)
22
23#def output(self, *args, **kwargs):
24#    print args, kwargs
25   
26#serverFiles = type("bla", (object,), dict(upload=output, unprotect=output))()
27
28
29try:
30    shutil.rmtree(tmp_path)
31except Exception, ex:
32    pass
33
34try:
35    os.mkdir(tmp_path)
36except Exception, ex:
37    pass
38
39realPath = os.path.realpath(os.curdir)
40os.chdir(tmp_path)
41
42obiKEGG.DEFAULT_DATABASE_PATH = tmp_path
43
44uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())
45
46def tar(filename, mode="w:gz", add=[]):
47    f = tarfile.open(filename, mode)
48    for path in add:
49        f.add(path)
50    f.close()
51    return uncompressedSize(filename)
52
53print "KEGGGenome.download()"
54obiKEGG.KEGGGenome.download()
55
56genome = obiKEGG.KEGGGenome()
57       
58essential_organisms = genome.essential_organisms()
59common_organisms = genome.common_organisms()
60
61files=["genes/genome"]
62
63print "Uploading kegg_genome.tar.gz"
64
65size = tar("kegg_genome.tar.gz", add=files)
66serverFiles.upload("KEGG", "kegg_genome.tar.gz", "kegg_genome.tar.gz", title="KEGG Genome",
67                   tags=["kegg", "genome", "taxonomy", "essential", "#uncompressed:%i" % size, "#compression:tar.gz", 
68                         "#version:%s" % obiKEGG.KEGGGenome.VERSION, "#files:%s" % "!@".join(files)])
69serverFiles.unprotect("KEGG", "kegg_genome.tar.gz")
70
71print "KEGGEnzymes.download()"
72obiKEGG.KEGGEnzymes.download()
73enzymes = obiKEGG.KEGGEnzymes()
74
75print "KEGGCompounds.download()"
76obiKEGG.KEGGCompounds.download()
77compounds = obiKEGG.KEGGCompounds()
78
79print "KEGGReactions.download()"
80obiKEGG.KEGGReactions.download()
81reactions = obiKEGG.KEGGReactions()
82
83files = ["ligand/enzyme/", "ligand/reaction/", "ligand/compound/"]
84size = tar("kegg_ligand.tar.gz", add=files)
85
86print "Uploading kegg_ligand.tar.gz"
87serverFiles.upload("KEGG", "kegg_ligand.tar.gz", "kegg_ligand.tar.gz", title="KEGG Ligand",
88                   tags=["kegg", "enzymes", "compunds", "reactions", "essential", "#uncompressed:%i" % size,
89                         "#compression:tar.gz", "#version:v1.0", "#files:%s" % "!@".join(files)])
90serverFiles.unprotect("KEGG", "kegg_ligand.tar.gz")
91
92
93### KEGG Reference Pathways
94############################
95
96print 'KEGGPathway.download_pathways("map")'
97obiKEGG.KEGGPathway.download_pathways("map")
98
99files = ["pathway/map/"]
100
101size = tar("kegg_pathways_map.tar.gz", add=files)
102
103print "Uploading kegg_pathways_map.tar.gz"
104serverFiles.upload("KEGG", "kegg_pathways_map.tar.gz", "kegg_pathways_map.tar.gz", title="KEGG Reference pathways (map)",
105                   tags=["kegg", "map", "pathways", "reference", "essential", "#uncompressed:%i" % size,
106                         "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)])
107serverFiles.unprotect("KEGG", "kegg_pathways_map.tar.gz")
108
109print 'KEGGPathway.download_pathways("ec")'
110obiKEGG.KEGGPathway.download_pathways("ec")
111
112files = ["pathway/ec/", "xml/kgml/metabolic/ec/"]
113
114size = tar("kegg_pathways_ec.tar.gz", add=files)
115
116print "Uploading kegg_pathways_ec.tar.gz"
117serverFiles.upload("KEGG", "kegg_pathways_ec.tar.gz", "kegg_pathways_ec.tar.gz", title="KEGG Reference pathways (ec)",
118                   tags=["kegg", "ec", "pathways", "reference", "essential", "#uncompressed:%i" % size,
119                         "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)])
120serverFiles.unprotect("KEGG", "kegg_pathways_ec.tar.gz")
121
122print 'KEGGPathway.download_pathways("ko")'
123obiKEGG.KEGGPathway.download_pathways("ko")
124
125files = ["pathway/ko/", "xml/kgml/metabolic/ko/", "xml/kgml/non-metabolic/ko/"]
126
127size = tar("kegg_pathways_ko.tar.gz", add=files)
128
129print "Uploading kegg_pathways_ko.tar.gz"
130serverFiles.upload("KEGG", "kegg_pathways_ko.tar.gz", "kegg_pathways_ko.tar.gz", title="KEGG Reference pathways (ko)",
131                   tags=["kegg", "ko", "pathways", "reference", "essential", "#uncompressed:%i" % size,
132                         "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)])
133serverFiles.unprotect("KEGG", "kegg_pathways_ko.tar.gz")
134
135
136for org_code in common_organisms:
137    org_name = genome[org_code].definition
138   
139    ### KEGG Genes
140    ##############
141   
142    print "KEGGGenes.download(%s)" % org_code
143    obiKEGG.KEGGGenes.download(org_code)
144
145    genes = obiKEGG.KEGGGenes(org_code)
146   
147    filename = "kegg_genes_%s.tar.gz" % org_code
148    files = [os.path.split(obiKEGG.KEGGGenes.filename(org_code))[0]]
149   
150    size = tar(filename, add=files)
151   
152    print "Uploading", filename
153    serverFiles.upload("KEGG", filename, filename, title="KEGG Genes for " + org_name,
154                       tags=["kegg", "genes", org_name, "#uncompressed:%i" % size, "#compression:tar.gz",
155                             "#version:%s" % obiKEGG.KEGGGenes.VERSION, "#files:%s" % "!@".join(files)] + (["essential"] if org_code in essential_organisms else []))
156    serverFiles.unprotect("KEGG", filename)
157   
158    ### KEGG Pathways
159    #################
160   
161    print "KEGGPathway.download_pathways(%s)" % org_code
162    obiKEGG.KEGGPathway.download_pathways(org_code)
163   
164    filename = "kegg_pathways_%s.tar.gz" % org_code
165    files = [obiKEGG.KEGGPathway.directory_png(org_code, path="").lstrip("/"), 
166             obiKEGG.KEGGPathway.directory_kgml(org_code, path="").lstrip("/"),
167             obiKEGG.KEGGPathway.directory_kgml(org_code, path="").lstrip("/").replace("metabolic", "non-metabolic")]
168   
169    size = tar(filename, add=files)
170   
171    print "Uploading", filename
172    serverFiles.upload("KEGG", filename, filename, title="KEGG Pathways for " + org_name,
173                       tags=["kegg", "genes", org_name, "#uncompressed:%i" % size, "#compression:tar.gz",
174                             "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)] + (["essential"] if org_code in essential_organisms else []))
175    serverFiles.unprotect("KEGG", filename)
176   
177   
178brite_ids = [line.split()[-1] for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/brite/br/").read().splitlines() if line.split()[-1].endswith(".keg")]
179ko_brite_ids = [line.split()[-1] for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/brite/ko/").read().splitlines() if line.split()[-1].endswith(".keg")]
180
181for id in brite_ids + ko_brite_ids:
182    print "KEGGBrite.download(%s)" % id.split(".")[0]
183    obiKEGG.KEGGBrite.download(id.split(".")[0])
184   
185files = ["brite/ko/", "brite/br/"]
186size = tar("kegg_brite.tar.gz", add=files)
187
188print "Uploading kegg_brite.tar.gz"
189serverFiles.upload("KEGG", "kegg_brite.tar.gz", "kegg_brite.tar.gz", title="KEGG Brite",
190                   tags=["kegg", "brite", "essential", "#uncompressed:%i" % size,
191                         "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGBrite.VERSION, "#files:%s" % "!@".join(files)])
192serverFiles.unprotect("KEGG", "kegg_brite.tar.gz")
193
194os.chdir(realPath)
Note: See TracBrowser for help on using the repository browser.