Changeset 9634:af72f198600b in orange


Ignore:
Timestamp:
02/05/12 20:38:10 (2 years ago)
Author:
ales_erjavec
Branch:
default
Message:

Update script using new kegg module.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • install-scripts/orngServer/serverUpdateScripts/updateKEGG.py

    r8042 r9634  
    22##!contact=ales.erjavec@fri.uni-lj.si 
    33 
    4 import obiKEGG, obiGene, obiTaxonomy 
    5 import orngServerFiles, orngEnviron 
    6 import os, sys, tarfile, urllib2, shutil 
     4import obiKEGG2, obiGene, obiTaxonomy, obiGeneSets 
     5import os, sys, tarfile, urllib2, shutil, cPickle 
    76from getopt import getopt 
    87 
    9 import obiData 
    10 obiKEGG.borg_class(obiData.FtpDownloader) #To limit the number of connections 
    11  
     8from Orange.misc import serverfiles, ConsoleProgressBar 
     9DOMAIN = "KEGG" 
    1210 
    1311opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0]) 
     
    1614password = opt.get("-p", opt.get("--password", "password")) 
    1715 
    18 tmp_path = os.path.join(orngEnviron.bufferDir, "tmp_KEGG/") 
     16sf = serverfiles.ServerFiles(username, password) 
    1917 
    20 #u = obiKEGG.Update(local_database_path=path) 
    21 serverFiles=orngServerFiles.ServerFiles(username, password) 
     18genome = obiKEGG2.KEGGGenome() 
     19common = genome.common_organisms() 
    2220 
    23 #def output(self, *args, **kwargs): 
    24 #    print args, kwargs 
    25      
    26 #serverFiles = type("bla", (object,), dict(upload=output, unprotect=output))() 
     21rev_taxmap = dict([(v, k) for k, v in genome.TAXID_MAP.items()]) 
    2722 
    2823 
    29 try: 
    30     shutil.rmtree(tmp_path) 
    31 except Exception, ex: 
    32     pass 
    33  
    34 try: 
    35     os.mkdir(tmp_path) 
    36 except Exception, ex: 
    37     pass 
    38  
    39 realPath = os.path.realpath(os.curdir) 
    40 os.chdir(tmp_path) 
    41  
    42 obiKEGG.DEFAULT_DATABASE_PATH = tmp_path 
    43  
    44 uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers()) 
    45  
    46 def tar(filename, mode="w:gz", add=[]): 
    47     f = tarfile.open(filename, mode) 
    48     for path in add: 
    49         f.add(path) 
    50     f.close() 
    51     return uncompressedSize(filename) 
    52  
    53 print "KEGGGenome.download()" 
    54 obiKEGG.KEGGGenome.download() 
    55  
    56 genome = obiKEGG.KEGGGenome() 
    57          
    58 essential_organisms = genome.essential_organisms() 
    59 common_organisms = genome.common_organisms() 
    60  
    61 files=["genes/genome"] 
    62  
    63 print "Uploading kegg_genome.tar.gz" 
    64  
    65 size = tar("kegg_genome.tar.gz", add=files) 
    66 serverFiles.upload("KEGG", "kegg_genome.tar.gz", "kegg_genome.tar.gz", title="KEGG Genome", 
    67                    tags=["kegg", "genome", "taxonomy", "essential", "#uncompressed:%i" % size, "#compression:tar.gz",  
    68                          "#version:%s" % obiKEGG.KEGGGenome.VERSION, "#files:%s" % "!@".join(files)]) 
    69 serverFiles.unprotect("KEGG", "kegg_genome.tar.gz") 
    70  
    71 print "KEGGEnzymes.download()" 
    72 obiKEGG.KEGGEnzymes.download() 
    73 enzymes = obiKEGG.KEGGEnzymes() 
    74  
    75 print "KEGGCompounds.download()" 
    76 obiKEGG.KEGGCompounds.download() 
    77 compounds = obiKEGG.KEGGCompounds() 
    78  
    79 print "KEGGReactions.download()" 
    80 obiKEGG.KEGGReactions.download() 
    81 reactions = obiKEGG.KEGGReactions() 
    82  
    83 files = ["ligand/enzyme/", "ligand/reaction/", "ligand/compound/"] 
    84 size = tar("kegg_ligand.tar.gz", add=files) 
    85  
    86 print "Uploading kegg_ligand.tar.gz" 
    87 serverFiles.upload("KEGG", "kegg_ligand.tar.gz", "kegg_ligand.tar.gz", title="KEGG Ligand", 
    88                    tags=["kegg", "enzymes", "compunds", "reactions", "essential", "#uncompressed:%i" % size, 
    89                          "#compression:tar.gz", "#version:v1.0", "#files:%s" % "!@".join(files)]) 
    90 serverFiles.unprotect("KEGG", "kegg_ligand.tar.gz") 
    91  
    92  
    93 ### KEGG Reference Pathways 
    94 ############################ 
    95  
    96 print 'KEGGPathway.download_pathways("map")' 
    97 obiKEGG.KEGGPathway.download_pathways("map") 
    98  
    99 files = ["pathway/map/"] 
    100  
    101 size = tar("kegg_pathways_map.tar.gz", add=files) 
    102  
    103 print "Uploading kegg_pathways_map.tar.gz" 
    104 serverFiles.upload("KEGG", "kegg_pathways_map.tar.gz", "kegg_pathways_map.tar.gz", title="KEGG Reference pathways (map)", 
    105                    tags=["kegg", "map", "pathways", "reference", "essential", "#uncompressed:%i" % size, 
    106                          "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)]) 
    107 serverFiles.unprotect("KEGG", "kegg_pathways_map.tar.gz") 
    108  
    109 print 'KEGGPathway.download_pathways("ec")' 
    110 obiKEGG.KEGGPathway.download_pathways("ec") 
    111  
    112 files = ["pathway/ec/", "xml/kgml/metabolic/ec/"] 
    113  
    114 size = tar("kegg_pathways_ec.tar.gz", add=files) 
    115  
    116 print "Uploading kegg_pathways_ec.tar.gz" 
    117 serverFiles.upload("KEGG", "kegg_pathways_ec.tar.gz", "kegg_pathways_ec.tar.gz", title="KEGG Reference pathways (ec)", 
    118                    tags=["kegg", "ec", "pathways", "reference", "essential", "#uncompressed:%i" % size, 
    119                          "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)]) 
    120 serverFiles.unprotect("KEGG", "kegg_pathways_ec.tar.gz") 
    121  
    122 print 'KEGGPathway.download_pathways("ko")' 
    123 obiKEGG.KEGGPathway.download_pathways("ko") 
    124  
    125 files = ["pathway/ko/", "xml/kgml/metabolic/ko/", "xml/kgml/non-metabolic/ko/"] 
    126  
    127 size = tar("kegg_pathways_ko.tar.gz", add=files) 
    128  
    129 print "Uploading kegg_pathways_ko.tar.gz" 
    130 serverFiles.upload("KEGG", "kegg_pathways_ko.tar.gz", "kegg_pathways_ko.tar.gz", title="KEGG Reference pathways (ko)", 
    131                    tags=["kegg", "ko", "pathways", "reference", "essential", "#uncompressed:%i" % size, 
    132                          "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)]) 
    133 serverFiles.unprotect("KEGG", "kegg_pathways_ko.tar.gz") 
    134  
    135  
    136 for org_code in common_organisms: 
    137     org_name = genome[org_code].definition 
     24for org in common: 
    13825     
    139     ### KEGG Genes 
    140     ############## 
     26    ##################### 
     27    # Create gene aliases 
     28    ##################### 
    14129     
    142     print "KEGGGenes.download(%s)" % org_code 
    143     obiKEGG.KEGGGenes.download(org_code) 
    144  
    145     genes = obiKEGG.KEGGGenes(org_code) 
     30#    genes = obiKEGG2.KEGGGenes(org) 
     31#     
     32#    pb = ConsoleProgressBar("Retriving KEGG ids for %r:" % org) 
     33#    genes.pre_cache(progress_callback=pb.set_state) 
     34#    aliases = [] 
     35#    for key, entry in genes.iteritems(): 
     36#        aliases.append(set([key]) | set(entry.alt_names)) 
     37#    pb.finish() 
     38#     
     39#    taxid = obiKEGG2.to_taxid(org) 
     40#    ids_filename = "kegg_gene_id_aliases_" + taxid + ".pickle" 
     41#    filename = serverfiles.localpath(DOMAIN, ids_filename) 
     42#     
     43#    cPickle.dump(aliases, open(filename, "wb")) 
     44#     
     45#    print "Uploading", ids_filename 
     46#    sf.upload(DOMAIN, ids_filename, filename, 
     47#              "KEGG Gene id aliases", 
     48#              tags=["KEGG", "genes", "aliases",  
     49#                    "#version:%s" % obiKEGG2.MatcherAliasesKEGG.VERSION 
     50#                    ], 
     51#              ) 
     52#    sf.unprotect(DOMAIN, ids_filename) 
    14653     
    147     filename = "kegg_genes_%s.tar.gz" % org_code 
    148     files = [os.path.split(obiKEGG.KEGGGenes.filename(org_code))[0]] 
     54    ########################## 
     55    # Create pathway gene sets 
     56    ########################## 
    14957     
    150     size = tar(filename, add=files) 
     58    organism = obiKEGG2.KEGGOrganism(org) 
     59    ge = genome[org] 
    15160     
    152     print "Uploading", filename 
    153     serverFiles.upload("KEGG", filename, filename, title="KEGG Genes for " + org_name, 
    154                        tags=["kegg", "genes", org_name, "#uncompressed:%i" % size, "#compression:tar.gz", 
    155                              "#version:%s" % obiKEGG.KEGGGenes.VERSION, "#files:%s" % "!@".join(files)] + (["essential"] if org_code in essential_organisms else [])) 
    156     serverFiles.unprotect("KEGG", filename) 
     61    taxid = rev_taxmap.get(ge.taxid, ge.taxid) 
     62    gene_sets = obiGeneSets.keggGeneSets(taxid) 
    15763     
    158     ### KEGG Pathways 
    159     ################# 
     64    print "Uploading pathway gene sets for", taxid, "(%s)" % org 
     65    obiGeneSets.register_serverfiles(gene_sets, sf) 
    16066     
    161     print "KEGGPathway.download_pathways(%s)" % org_code 
    162     obiKEGG.KEGGPathway.download_pathways(org_code) 
    163      
    164     filename = "kegg_pathways_%s.tar.gz" % org_code 
    165     files = [obiKEGG.KEGGPathway.directory_png(org_code, path="").lstrip("/"),  
    166              obiKEGG.KEGGPathway.directory_kgml(org_code, path="").lstrip("/"), 
    167              obiKEGG.KEGGPathway.directory_kgml(org_code, path="").lstrip("/").replace("metabolic", "non-metabolic")] 
    168      
    169     size = tar(filename, add=files) 
    170      
    171     print "Uploading", filename 
    172     serverFiles.upload("KEGG", filename, filename, title="KEGG Pathways for " + org_name, 
    173                        tags=["kegg", "genes", org_name, "#uncompressed:%i" % size, "#compression:tar.gz", 
    174                              "#version:%s" % obiKEGG.KEGGPathway.VERSION, "#files:%s" % "!@".join(files)] + (["essential"] if org_code in essential_organisms else [])) 
    175     serverFiles.unprotect("KEGG", filename) 
    176      
    177      
    178 brite_ids = [line.split()[-1] for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/brite/br/").read().splitlines() if line.split()[-1].endswith(".keg")] 
    179 ko_brite_ids = [line.split()[-1] for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/brite/ko/").read().splitlines() if line.split()[-1].endswith(".keg")] 
    180  
    181 for id in brite_ids + ko_brite_ids: 
    182     print "KEGGBrite.download(%s)" % id.split(".")[0] 
    183     obiKEGG.KEGGBrite.download(id.split(".")[0]) 
    184      
    185 files = ["brite/ko/", "brite/br/"] 
    186 size = tar("kegg_brite.tar.gz", add=files) 
    187  
    188 print "Uploading kegg_brite.tar.gz" 
    189 serverFiles.upload("KEGG", "kegg_brite.tar.gz", "kegg_brite.tar.gz", title="KEGG Brite", 
    190                    tags=["kegg", "brite", "essential", "#uncompressed:%i" % size, 
    191                          "#compression:tar.gz", "#version:%s" % obiKEGG.KEGGBrite.VERSION, "#files:%s" % "!@".join(files)]) 
    192 serverFiles.unprotect("KEGG", "kegg_brite.tar.gz") 
    193  
    194 os.chdir(realPath) 
Note: See TracChangeset for help on using the changeset viewer.