source: orange-bioinformatics/orangecontrib/bio/obiOMIM.py @ 1873:0810c5708cc5

Revision 1873:0810c5708cc5, 3.1 KB checked in by Ales Erjavec <ales.erjavec@…>, 7 months ago (diff)

Moved '_bioinformatics' into orangecontrib namespace.

Line 
1import sys, os
2import urllib2
3import shutil
4import re
5import Orange
6
7from collections import defaultdict
8
9class disease(object):
10    """ A class representing a disease in the OMIM database
11    """
12    regex = re.compile(r'(?P<name>.*?),? (?P<id>[0-9]{3,6} )?(?P<m1>\([123?]\) )?(?P<m2>\([123?]\) )? *$')
13    __slots__ = ["name", "id", "mapping"]
14    def __init__(self, morbidmap_line):
15        string = morbidmap_line.split("|", 1)[0]
16        match = self.regex.match(string)
17#        print string
18#        print match.groups()
19        self.name, self.id, self.mapping = [s.strip() if s else s for s in match.groups()[:3]]
20        if match.group("m2"):
21            self.mapping += " " + match.group("m2").strip()
22                                                                               
23class OMIM(object):
24    VERSION = 1
25    DEFAULT_DATABASE_PATH = Orange.utils.serverfiles.localpath("OMIM")
26    def __init__(self, local_database_path=None):
27        self.local_database_path = local_database_path if local_database_path is not None else self.DEFAULT_DATABASE_PATH
28 
29        if self.local_database_path == self.DEFAULT_DATABASE_PATH:
30            filename = Orange.utils.serverfiles.localpath_download("OMIM", "morbidmap")
31        else:
32            filename = os.path.join(self.local_database_path, "morbidmap")
33
34        self.load(filename)
35   
36    @classmethod
37    def download_from_NCBI(cls, file=None):
38        if isinstance(file, basestring):
39            file = open(file, "wb")
40        stream = urllib2.urlopen("ftp://ftp.ncbi.nih.gov/repository/OMIM/ARCHIVE/morbidmap")
41        shutil.copyfileobj(stream, file, length=10)
42        file.close()
43
44    @classmethod
45    def get_instance(cls):
46        if not hasattr(cls, "_shared_dict"):
47            omim = OMIM()
48            cls._shared_dict = omim.__dict__
49        instance = OMIM.__new__(OMIM)
50        instance.__dict__ = cls._shared_dict
51        return instance
52   
53    def load(self, filename):
54        file = open(filename, "rb")
55        lines = file.read().splitlines()
56        self._disease_dict = dict([(disease(line), line) for line in lines if line])
57   
58    def diseases(self):
59        return self._disease_dict.keys()
60   
61    def genes(self):
62        return sorted(set(reduce(list.__add__, [self.disease_genes(disease) for disease in self.diseases()], [])))
63   
64    def disease_genes(self, disease):
65        return self._disease_dict[disease].split("|")[1].split(", ")
66   
67    def gene_diseases(self):
68        d = defaultdict(set)
69        for disease, genes in [(disease, self.disease_genes(disease)) for disease in self.diseases()]:
70            for gene in genes:
71                d[gene].add(disease)
72        return d
73
74def diseases():
75    """ Return all disease objects
76    """
77    return OMIM.get_instance().diseases()
78
79def genes():
80    """ Return a set of all genes referenced in OMIM
81    """
82    return OMIM.get_instance().genes()
83
84def disease_genes(disease):
85    """ Return a set of all genes referenced by disease in OMIM
86    """
87    return OMIM.get_instance().disease_genes(disease)
88
89def gene_diseases():
90    """ Return a dictionary {gene: set(disease_objects for gene), ...}
91    """
92    return OMIM.get_instance().gene_diseases()
93
Note: See TracBrowser for help on using the repository browser.