source: orange-bioinformatics/obiOMIM.py @ 1519:b5ea81f44308

Revision 1519:b5ea81f44308, 3.5 KB checked in by ales_erjavec <ales.erjavec@…>, 2 years ago (diff)

Fixed the ncbi omim download path.

Line 
1import orngServerFiles
2import sys, os
3import urllib2
4import shutil
5import re
6
7from collections import defaultdict
8
9class disease(object):
10    """ A class representing a disease in the OMIM database
11    """
12    regex = re.compile(r'(?P<name>.*?),? (?P<id>[0-9]{3,6} )?(?P<m1>\([123?]\) )?(?P<m2>\([123?]\) )? *$')
13    __slots__ = ["name", "id", "mapping"]
14    def __init__(self, morbidmap_line):
15        string = morbidmap_line.split("|", 1)[0]
16        match = self.regex.match(string)
17#        print string
18#        print match.groups()
19        self.name, self.id, self.mapping = [s.strip() if s else s for s in match.groups()[:3]]
20        if match.group("m2"):
21            self.mapping += " " + match.group("m2").strip()
22       
23class OMIM(object):
24    VERSION = 1
25    DEFAULT_DATABASE_PATH = orngServerFiles.localpath("OMIM")
26    def __init__(self, local_database_path=None):
27        self.local_database_path = local_database_path if local_database_path is not None else self.DEFAULT_DATABASE_PATH
28       
29        if not os.path.exists(self.local_database_path):
30            os.makedirs(self.local_database_path)
31           
32        filename = os.path.join(self.local_database_path, "morbidmap")
33        if not os.path.exists(filename):
34            stream = urllib2.urlopen("ftp://ftp.ncbi.nih.gov/repository/OMIM/ARCHIVE/morbidmap")
35            with open(filename, "wb") as file:
36                shutil.copyfileobj(stream, file, length=10)
37           
38           
39        self.load(filename)
40   
41    @classmethod
42    def download_from_NCBI(cls, file=None):
43        data = urllib2.urlopen("ftp://ftp.ncbi.nih.gov/repository/OMIM/ARCHIVE/morbidmap").read()
44        if file is None:
45            if not os.path.exists(cls.DEFAULT_DATABASE_PATH):
46                os.mkdir(cls.DEFAULT_DATABASE_PATH)
47            file = open(os.path.join(cls.DEFAULT_DATABASE_PATH, "morbidmap"), "wb")
48        elif type(file) in [str, unicode]:
49            file = open(file, "wb")
50        file.write(data)
51       
52    @classmethod
53    def get_instance(cls):
54        if not hasattr(cls, "_shared_dict"):
55            omim = OMIM()
56            cls._shared_dict = omim.__dict__
57        instance = OMIM.__new__(OMIM)
58        instance.__dict__ = cls._shared_dict
59        return instance
60   
61    def load(self, filename):
62        file = open(filename, "rb")
63        lines = file.read().split("\n")
64        self._disease_dict = dict([(disease(line), line) for line in lines if line])
65       
66    def diseases(self):
67        return self._disease_dict.keys()
68   
69    def genes(self):
70        return sorted(set(reduce(list.__add__, [self.disease_genes(disease) for disease in self.diseases()], [])))
71   
72    def disease_genes(self, disease):
73        return self._disease_dict[disease].split("|")[1].split(", ")
74   
75    def gene_diseases(self):
76        d = defaultdict(set)
77        for disease, genes in [(disease, self.disease_genes(disease)) for disease in self.diseases()]:
78            for gene in genes:
79                d[gene].add(disease)
80        return d
81   
82def diseases():
83    """ Return all disease objects
84    """
85    return OMIM.get_instance().diseases()
86       
87def genes():
88    """ Return a set of all genes referenced in OMIM
89    """
90    return OMIM.get_instance().genes()
91
92def disease_genes(disease):
93    """ Return a set of all genes referenced by disease in OMIM
94    """
95    return OMIM.get_instance().disease_genes(disease)
96
97def gene_diseases():
98    """ Return a dictionary {gene: set(disease_objects for gene), ...}
99    """
100    return OMIM.get_instance().gene_diseases()
Note: See TracBrowser for help on using the repository browser.