source: orange-bioinformatics/server_update/updateHomoloGene.py @ 1721:921b3780c6cc

Revision 1721:921b3780c6cc, 3.7 KB checked in by markotoplak, 19 months ago (diff)

Moved common functionality of the server files update scripts to a separate file.

Line 
1##!interval=7
2##!contact=ales.erjavec@fri.uni-lj.si
3
4from common import *
5
6from Orange.bio import obiHomoloGene
7
8path = os.path.join(environ.buffer_dir, "tmp_HomoloGene")
9
10try:
11    os.mkdir(path)
12except OSError:
13    pass
14filename = os.path.join(path, "homologene.data")
15obiHomoloGene.HomoloGene.download_from_NCBI(filename)
16uncompressed = os.stat(filename).st_size
17import gzip, shutil
18f = gzip.open(filename + ".gz", "wb")
19shutil.copyfileobj(open(filename), f)
20f.close()
21
22#sf_server.create_domain("HomoloGene")
23print "Uploading homologene.data"
24sf_server.upload("HomoloGene", "homologene.data", filename + ".gz", title="HomoloGene",
25                   tags=["genes", "homologs", "HomoloGene", "#compression:gz",
26                         "#uncompressed:%i" % uncompressed, 
27                         "#version:%i" % obiHomoloGene.HomoloGene.VERSION])
28sf_server.unprotect("HomoloGene", "homologene.data")
29
30####
31# InParanioid Orthologs update
32####
33
34organisms = {"3702": "A.thaliana",
35            "9913": "B.taurus",
36            "6239": "C.elegans",
37            "3055": "C.reinhardtii",
38            "7955": "D.rerio",
39            "352472": "D.discoideum",
40            "7227":  "D.melanogaster",
41            "562":  "E.coliK12",
42            #"11103", # Hepatitis C virus
43            "9606": "H.sapiens",
44            "10090": "M.musculus",
45            #"2104",  # Mycoplasma pneumoniae
46            "4530": "O.sativa",
47            "5833": "P.falciparum",
48            #"4754",  # Pneumocystis carinii
49            "10116": "R.norvegicus",
50            "4932": "S.cerevisiae",
51            "4896":  "S.pombe",
52            "31033": "T.rubripes"
53            #"8355",  # Xenopus laevis
54            #"4577",  # Zea mays
55            }
56
57import urllib2
58combined_orthologs = []
59       
60def gen(i=0):
61    while True:
62        yield str(i)
63        i += 1
64
65from collections import defaultdict
66unique_cluster_id = defaultdict(gen().next)
67         
68organisms = sorted(organisms.values())
69
70import time
71for i, org1 in enumerate(organisms):
72    for org2 in organisms[i+1:]:
73        print "http://inparanoid.sbc.su.se/download/current/orthoXML/InParanoid.%s-%s.orthoXML" % (org1, org2)
74        try:
75            stream = urllib2.urlopen("http://inparanoid.sbc.su.se/download/current/orthoXML/InParanoid.%s-%s.orthoXML" % (org1, org2))
76        except Exception, ex:
77            print ex
78            continue
79        orthologs = obiHomoloGene._parseOrthoXML(stream)
80        orthologs = [(unique_cluster_id[org1, org2, clid], taxid, gene_symbol) for (clid, taxid , gene_symbol) in orthologs]
81       
82        combined_orthologs.extend(orthologs)
83        time.sleep(10)
84       
85#import cPickle
86#cPickle.dump(combined_orthologs, open("orthologs.pck", "wb"))
87#combined_orthologs = cPickle.load(open("orthologs.pck"))
88
89import sqlite3
90
91filename  = os.path.join(path, "InParanoid.sqlite")
92con = sqlite3.connect(filename)
93con.execute("drop table if exists homologs")
94con.execute("create table homologs (groupid text, taxid text, geneid text)")
95con.execute("create index group_index on homologs(groupid)")
96con.execute("create index geneid_index on homologs(geneid)")
97con.executemany("insert into homologs values (?, ?, ?)", combined_orthologs)
98con.commit()
99
100
101
102file = open(filename, "rb")
103gzfile = gzip.GzipFile(filename + ".gz", "wb")
104shutil.copyfileobj(file, gzfile)
105gzfile.close()
106
107print "Uploading InParanoid.sqlite"
108sf_server.upload("HomoloGene", "InParanoid.sqlite", filename + ".gz", title="InParanoid: Eukaryotic Ortholog Groups",
109                   tags=["genes", "homologs", "orthologs", "InParanoid", "#compression:gz",
110                         "#uncompressed:%i" % os.stat(filename).st_size,
111                         "#version:%i" % obiHomoloGene.InParanoid.VERSION])
112sf_server.unprotect("HomoloGene", "InParanoid.sqlite")
113       
114       
115           
Note: See TracBrowser for help on using the repository browser.