source: orange-bioinformatics/server_update/updateHomoloGene.py @ 1719:6e8861564778

Revision 1719:6e8861564778, 4.1 KB checked in by markotoplak, 20 months ago (diff)

Fixed update scripts for MeSH, GO, HomoloGene, NCBI_geneinfo, OMIM, PPI. Moved wget to Orange.utils.

Line 
1##!interval=7
2##!contact=ales.erjavec@fri.uni-lj.si
3
4from Orange.bio import obiHomoloGene
5import Orange.utils.serverfiles as orngServerFiles
6
7import Orange.utils.environ as orngEnviron
8import os, sys
9import gzip, shutil
10
11from getopt import getopt
12
13opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
14
15username = opt.get("-u", opt.get("--user", "username"))
16password = opt.get("-p", opt.get("--password", "password"))
17
18path = os.path.join(orngEnviron.buffer_dir, "tmp_HomoloGene")
19serverFiles = orngServerFiles.ServerFiles(username, password)
20
21try:
22    os.mkdir(path)
23except OSError:
24    pass
25filename = os.path.join(path, "homologene.data")
26obiHomoloGene.HomoloGene.download_from_NCBI(filename)
27uncompressed = os.stat(filename).st_size
28import gzip, shutil
29f = gzip.open(filename + ".gz", "wb")
30shutil.copyfileobj(open(filename), f)
31f.close()
32
33#serverFiles.create_domain("HomoloGene")
34print "Uploading homologene.data"
35serverFiles.upload("HomoloGene", "homologene.data", filename + ".gz", title="HomoloGene",
36                   tags=["genes", "homologs", "HomoloGene", "#compression:gz",
37                         "#uncompressed:%i" % uncompressed, 
38                         "#version:%i" % obiHomoloGene.HomoloGene.VERSION])
39serverFiles.unprotect("HomoloGene", "homologene.data")
40
41####
42# InParanioid Orthologs update
43####
44
45organisms = {"3702": "A.thaliana",
46            "9913": "B.taurus",
47            "6239": "C.elegans",
48            "3055": "C.reinhardtii",
49            "7955": "D.rerio",
50            "352472": "D.discoideum",
51            "7227":  "D.melanogaster",
52            "562":  "E.coliK12",
53            #"11103", # Hepatitis C virus
54            "9606": "H.sapiens",
55            "10090": "M.musculus",
56            #"2104",  # Mycoplasma pneumoniae
57            "4530": "O.sativa",
58            "5833": "P.falciparum",
59            #"4754",  # Pneumocystis carinii
60            "10116": "R.norvegicus",
61            "4932": "S.cerevisiae",
62            "4896":  "S.pombe",
63            "31033": "T.rubripes"
64            #"8355",  # Xenopus laevis
65            #"4577",  # Zea mays
66            }
67
68import urllib2
69combined_orthologs = []
70       
71def gen(i=0):
72    while True:
73        yield str(i)
74        i += 1
75
76from collections import defaultdict
77unique_cluster_id = defaultdict(gen().next)
78         
79organisms = sorted(organisms.values())
80
81import time
82for i, org1 in enumerate(organisms):
83    for org2 in organisms[i+1:]:
84        print "http://inparanoid.sbc.su.se/download/current/orthoXML/InParanoid.%s-%s.orthoXML" % (org1, org2)
85        try:
86            stream = urllib2.urlopen("http://inparanoid.sbc.su.se/download/current/orthoXML/InParanoid.%s-%s.orthoXML" % (org1, org2))
87        except Exception, ex:
88            print ex
89            continue
90        orthologs = obiHomoloGene._parseOrthoXML(stream)
91        orthologs = [(unique_cluster_id[org1, org2, clid], taxid, gene_symbol) for (clid, taxid , gene_symbol) in orthologs]
92       
93        combined_orthologs.extend(orthologs)
94        time.sleep(10)
95       
96#import cPickle
97#cPickle.dump(combined_orthologs, open("orthologs.pck", "wb"))
98#combined_orthologs = cPickle.load(open("orthologs.pck"))
99
100import sqlite3
101
102filename  = os.path.join(path, "InParanoid.sqlite")
103con = sqlite3.connect(filename)
104con.execute("drop table if exists homologs")
105con.execute("create table homologs (groupid text, taxid text, geneid text)")
106con.execute("create index group_index on homologs(groupid)")
107con.execute("create index geneid_index on homologs(geneid)")
108con.executemany("insert into homologs values (?, ?, ?)", combined_orthologs)
109con.commit()
110
111
112
113file = open(filename, "rb")
114gzfile = gzip.GzipFile(filename + ".gz", "wb")
115shutil.copyfileobj(file, gzfile)
116gzfile.close()
117
118print "Uploading InParanoid.sqlite"
119serverFiles.upload("HomoloGene", "InParanoid.sqlite", filename + ".gz", title="InParanoid: Eukaryotic Ortholog Groups",
120                   tags=["genes", "homologs", "orthologs", "InParanoid", "#compression:gz",
121                         "#uncompressed:%i" % os.stat(filename).st_size,
122                         "#version:%i" % obiHomoloGene.InParanoid.VERSION])
123serverFiles.unprotect("HomoloGene", "InParanoid.sqlite")
124       
125       
126           
Note: See TracBrowser for help on using the repository browser.