source: orange-bioinformatics/orangecontrib/bio/obiDictyMutants.py @ 1919:8de40fbc5a87

Revision 1919:8de40fbc5a87, 7.1 KB checked in by blaz <blaz.zupan@…>, 5 months ago (diff)

Some revision of rst documentation.

Line 
1import os
2import urllib2
3import shutil
4import pickle
5
6from collections import defaultdict
7
8from Orange.orng import orngServerFiles
9
10from Orange.utils.serverfiles import localpath_download
11
12domain = "dictybase"
13pickle_file = "mutants.pkl"
14tags = ["Dictyostelium discoideum", "mutant", "dictyBase", "phenotype"]
15
16
17class DictyMutant(object):
18    """
19    A class representing a single Dictyostelium discoideum mutant
20    from Dictybase
21   
22    :param mutant_entry: A single mutant entry from
23        dictyBase's `all curated mutants file <http://dictybase.org/db/cgi-bin/dictyBase/download/download.pl?area=mutant_phenotypes&ID=all-mutants.txt>`_ (updated monthly)
24    :type mutant_entry: str
25
26    :ivar DictyMutant.name: dictyBase ID for a mutant
27    :ivar DictyMutant.descriptor: dictyBase strain descriptor of a mutant
28    :ivar DictyMutant.genes: all of the mutant's associated genes
29    :ivar DictyMutant.phenotypes: all of the mutant's associated phenotypes
30
31    """
32    def __init__(self, mutant_entry):
33        mutant = mutant_entry.split("\t")
34        self.name = mutant[0]
35        self.descriptor = mutant[1]
36        self.genes = mutant[2].split(" | ")
37        self.phenotypes = mutant[3].split(" | ")
38        self.null = False
39        self.overexp = False
40        self.multiple = False
41        self.develop = False
42        self.other = False
43
44
45class DictyMutants(object):
46    """
47    A class representing the collection of all Dictybase mutants as
48    a dictionary of `DictyMutant` objects
49   
50    :param local_database_path: A user defined path for storing D. dictyostelium mutants objects in a file. If `None` then a default database path is used.
51   
52    """
53   
54    VERSION=1
55    DEFAULT_DATABASE_PATH = orngServerFiles.localpath("DictyMutants") #use a default local folder for storing the genesets
56   
57    def __init__(self, local_database_path=None):
58        self.local_database_path = local_database_path if local_database_path is not None else self.DEFAULT_DATABASE_PATH
59       
60        if not os.path.exists(self.local_database_path):
61            os.mkdir(self.local_database_path)
62           
63        self._mutants = pickle.load(open(localpath_download(domain, pickle_file), "rb"))
64             
65    def update_file(self, name):
66        url = "http://dictybase.org/db/cgi-bin/dictyBase/download/download.pl?area=mutant_phenotypes&ID="
67        filename = os.path.join(self.local_database_path, name)
68        temp_file = os.path.join(self.local_database_path, name + "_temp")
69        stream = urllib2.urlopen(url + name)
70   
71        with open(temp_file, "wb") as file:
72            shutil.copyfileobj(stream, file)
73   
74        os.rename(temp_file, filename)
75        return filename
76   
77    def load_mutants(self, file):
78        data = open(file)
79        #data_header = data.readline()
80        data = data.read()
81        return data.splitlines()
82                 
83    def download_mutants(self):   
84        all_mutants = self.load_mutants(self.update_file("all-mutants.txt"))
85        null_mutants = self.load_mutants(
86            self.update_file("null-mutants.txt"))
87        overexp_mutants = self.load_mutants(
88            self.update_file("overexpression-mutants.txt"))
89        multiple_mutants = self.load_mutants(
90            self.update_file("multiple-mutants.txt"))
91        develop_mutants = self.load_mutants(
92            self.update_file("developmental-mutants.txt"))
93        other_mutants = self.load_mutants(
94            self.update_file("other-mutants.txt"))
95   
96        _mutants = [DictyMutant(mutant) for mutant in all_mutants]
97       
98        the_nulls = set([DictyMutant(line).name for line in null_mutants])
99        the_overexps = set([DictyMutant(line).name for line in overexp_mutants])
100        the_multiples = set([DictyMutant(line).name for line in multiple_mutants])
101        the_develops = set([DictyMutant(line).name for line in develop_mutants])
102        the_others = set([DictyMutant(line).name for line in other_mutants])
103
104        for mutant in _mutants:
105            if mutant.name in the_nulls: mutant.null = True
106            if mutant.name in the_overexps: mutant.overexp = True 
107            if mutant.name in the_multiples: mutant.multiple = True
108            if mutant.name in the_develops: mutant.develop = True
109            if mutant.name in the_others: mutant.other = True
110       
111        final_mutants = {x: x for x in _mutants}
112        return final_mutants
113
114    def pickle_data(self):
115        return pickle.dumps(self.download_mutants(), -1)
116
117    @classmethod
118    def get_instance(cls):
119        if not hasattr(cls, "_shared_dict"):
120            dicty = DictyMutants()
121            cls._shared_dict = dicty.__dict__
122        instance = DictyMutants.__new__(DictyMutants)
123        instance.__dict__ = cls._shared_dict
124        return instance
125
126    def mutants(self):
127        return self._mutants.keys()
128
129    def genes(self):
130        return sorted(set(reduce(list.__add__,
131                                 [self.mutant_genes(mutant)
132                                  for mutant in self.mutants()], [])))
133
134    def phenotypes(self):
135        return sorted(set(reduce(list.__add__,
136                                 [self.mutant_phenotypes(mutant)
137                                  for mutant in self.mutants()], [])))
138
139    def mutant_genes(self, mutant):
140        return self._mutants[mutant].genes
141   
142    def mutant_phenotypes(self, mutant):
143        return self._mutants[mutant].phenotypes
144
145    def gene_mutants(self):
146        dgm = defaultdict(set)
147        for mutant, genes in [(mutant, self.mutant_genes(mutant))
148                              for mutant in self.mutants()]:
149            for gene in genes:
150                dgm[gene].add(mutant)
151        return dgm
152
153    def phenotype_mutants(self):
154        dpm = defaultdict(set)
155        for mutant, phenotypes in [(mutant, self.mutant_phenotypes(mutant))
156                                   for mutant in self.mutants()]:
157            for phenotype in phenotypes:
158                dpm[phenotype].add(mutant)
159        return dpm
160
161
162def mutants():
163    """ Return all mutant objects
164    """
165    return DictyMutants.get_instance().mutants()
166
167
168def genes():
169    """ Return a set of all genes referenced in dictybase
170    """
171    return DictyMutants.get_instance().genes()
172
173
174def phenotypes():
175    """ Return a set of all phenotypes referenced in dictybase
176    """
177    return DictyMutants.get_instance().phenotypes()
178
179
180def mutant_genes(mutant):
181    """ Return a set of all genes referenced by a mutant in dictybase
182    """
183    return DictyMutants.get_instance().mutant_genes(mutant)
184
185
186def mutant_phenotypes(mutant):   
187    """ Return a set of all phenotypes referenced by a mutant in dictybase
188    """
189    return DictyMutants.get_instance().mutant_phenotypes(mutant)
190
191
192def gene_mutants():
193    """ Return a dictionary {gene: set(mutant_objects for mutant), ...}
194    """
195    return DictyMutants.get_instance().gene_mutants()
196
197
198def phenotype_mutants():
199    """ Return a dictionary {phenotype: set(mutant_objects for mutant), ...}
200    """
201    return DictyMutants.get_instance().phenotype_mutants()
202
203
204def download_mutants():
205    return DictyMutants.get_instance().pickle_data()
206
207
208if __name__ == "__main__":
209    dicty_mutants = mutants()
210    print mutant_phenotypes(dicty_mutants[0])
Note: See TracBrowser for help on using the repository browser.