source: orange-bioinformatics/_bioinformatics/obiOMIM.py @ 1795:663192bd4469

Revision 1795:663192bd4469, 3.6 KB checked in by Flashpoint <vid.flashpoint@…>, 11 months ago (diff)

Removed obiCytobands.py and created genesets in obiGeneSets.py. updateCytobands.py saves only genesets now

Line 
1import sys, os
2import urllib2
3import shutil
4import re
5
6from collections import defaultdict
7
8from Orange.orng import orngServerFiles
9
10class disease(object):
11    """ A class representing a disease in the OMIM database
12    """
13    regex = re.compile(r'(?P<name>.*?),? (?P<id>[0-9]{3,6} )?(?P<m1>\([123?]\) )?(?P<m2>\([123?]\) )? *$')
14    __slots__ = ["name", "id", "mapping"]
15    def __init__(self, morbidmap_line):
16        string = morbidmap_line.split("|", 1)[0]
17        match = self.regex.match(string)
18        print string
19        print match.groups()
20        self.name, self.id, self.mapping = [s.strip() if s else s for s in match.groups()[:3]]
21        if match.group("m2"):
22            self.mapping += " " + match.group("m2").strip()
23       
24class OMIM(object):
25    VERSION = 1
26    DEFAULT_DATABASE_PATH = orngServerFiles.localpath("OMIM")
27    def __init__(self, local_database_path=None):
28        self.local_database_path = local_database_path if local_database_path is not None else self.DEFAULT_DATABASE_PATH
29       
30        if not os.path.exists(self.local_database_path):
31            os.makedirs(self.local_database_path)
32           
33        filename = os.path.join(self.local_database_path, "morbidmap")
34        print filename
35        if not os.path.exists(filename):
36            stream = urllib2.urlopen("ftp://ftp.ncbi.nih.gov/repository/OMIM/ARCHIVE/morbidmap")
37            with open(filename, "wb") as file:
38                shutil.copyfileobj(stream, file, length=10)
39           
40           
41        self.load(filename)
42   
43    @classmethod
44    def download_from_NCBI(cls, file=None):
45        data = urllib2.urlopen("ftp://ftp.ncbi.nih.gov/repository/OMIM/ARCHIVE/morbidmap").read()
46        if file is None:
47            if not os.path.exists(cls.DEFAULT_DATABASE_PATH):
48                os.mkdir(cls.DEFAULT_DATABASE_PATH)
49            file = open(os.path.join(cls.DEFAULT_DATABASE_PATH, "morbidmap"), "wb")
50        elif isinstance(file, basestring):
51            file = open(file, "wb")
52        file.write(data)
53        file.close()
54       
55    @classmethod
56    def get_instance(cls):
57        if not hasattr(cls, "_shared_dict"):
58            omim = OMIM()
59            cls._shared_dict = omim.__dict__
60        instance = OMIM.__new__(OMIM)
61        instance.__dict__ = cls._shared_dict
62        return instance
63   
64    def load(self, filename):
65        file = open(filename, "rb")
66        lines = file.read().splitlines()
67        self._disease_dict = dict([(disease(line), line) for line in lines if line])
68       
69    def diseases(self):
70        return self._disease_dict.keys()
71   
72    def genes(self):
73        return sorted(set(reduce(list.__add__, [self.disease_genes(disease) for disease in self.diseases()], [])))
74   
75    def disease_genes(self, disease):
76        return self._disease_dict[disease].split("|")[1].split(", ")
77   
78    def gene_diseases(self):
79        d = defaultdict(set)
80        for disease, genes in [(disease, self.disease_genes(disease)) for disease in self.diseases()]:
81            for gene in genes:
82                d[gene].add(disease)
83        return d
84   
85def diseases():
86    """ Return all disease objects
87    """
88    return OMIM.get_instance().diseases()
89       
90def genes():
91    """ Return a set of all genes referenced in OMIM
92    """
93    return OMIM.get_instance().genes()
94
95def disease_genes(disease):
96    """ Return a set of all genes referenced by disease in OMIM
97    """
98    return OMIM.get_instance().disease_genes(disease)
99
100def gene_diseases():
101    """ Return a dictionary {gene: set(disease_objects for gene), ...}
102    """
103    return OMIM.get_instance().gene_diseases()
104
105if __name__ == "__main__":
106    print genes()
Note: See TracBrowser for help on using the repository browser.