source: orange-bioinformatics/server_update/updateNCBI_geneinfo.py @ 1719:6e8861564778

Revision 1719:6e8861564778, 2.4 KB checked in by markotoplak, 20 months ago (diff)

Fixed update scripts for MeSH, GO, HomoloGene, NCBI_geneinfo, OMIM, PPI. Moved wget to Orange.utils.

Line 
1##interval:7
2from Orange.bio import obiGene, obiTaxonomy
3import Orange.utils.serverfiles as orngServerFiles
4import Orange.utils.environ as orngEnviron
5import sys, os
6from gzip import GzipFile
7from getopt import getopt
8
9opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
10
11tmpdir = os.path.join(orngEnviron.buffer_dir, "tmp_NCBIGene_info")
12try:
13    os.mkdir(tmpdir)
14except Exception, ex:
15    pass
16
17username = opt.get("-u", opt.get("--user", "username"))
18password = opt.get("-p", opt.get("--password", "password"))
19
20gene_info_filename = os.path.join(tmpdir, "gene_info")
21gene_history_filename = os.path.join(tmpdir, "gene_history")
22
23obiGene.NCBIGeneInfo.get_geneinfo_from_ncbi(gene_info_filename)
24obiGene.NCBIGeneInfo.get_gene_history_from_ncbi(gene_history_filename)
25
26info = open(gene_info_filename, "rb")
27hist = open(gene_history_filename, "rb")
28
29taxids = obiGene.NCBIGeneInfo.common_taxids()
30essential = obiGene.NCBIGeneInfo.essential_taxids()
31
32genes = dict([(taxid, []) for taxid in taxids])
33for gi in info:
34    if any(gi.startswith(id + "\t") for id in taxids):
35        genes[gi.split("\t", 1)[0]].append(gi.strip())
36
37history = dict([(taxid, []) for taxid in taxids])
38for hi in hist:
39    if any(hi.startswith(id + "\t") for id in taxids): 
40        history[hi.split("\t", 1)[0]].append(hi.strip())
41
42       
43sf = orngServerFiles.ServerFiles(username, password)
44
45for taxid, genes in genes.items():
46    filename = os.path.join(tmpdir, "gene_info.%s.db" % taxid)
47    f = open(filename, "wb")
48    f.write("\n".join(genes))
49    f.flush()
50    f.close()
51    print "Uploading", filename
52    sf.upload("NCBI_geneinfo", "gene_info.%s.db" % taxid, filename,
53              title = "NCBI gene info for %s" % obiTaxonomy.name(taxid),
54              tags = ["NCBI", "gene info", "gene_names", obiTaxonomy.name(taxid)] + (["essential"] if taxid in essential else []))
55    sf.unprotect("NCBI_geneinfo", "gene_info.%s.db" % taxid)
56   
57    filename = os.path.join(tmpdir, "gene_history.%s.db" % taxid)
58    f = open(filename, "wb")
59    f.write("\n".join(history.get(taxid, "")))
60    f.flush()
61    f.close()
62    print "Uploading", filename
63    sf.upload("NCBI_geneinfo", "gene_history.%s.db" % taxid, filename,
64              title = "NCBI gene history for %s" % obiTaxonomy.name(taxid),
65              tags = ["NCBI", "gene info", "history", "gene_names", obiTaxonomy.name(taxid)] + (["essential"] if taxid in essential else []))
66    sf.unprotect("NCBI_geneinfo", "gene_history.%s.db" % taxid)
Note: See TracBrowser for help on using the repository browser.