source: orange-bioinformatics/_bioinformatics/obiDictyMutants.py @ 1765:b497b961c944

Revision 1765:b497b961c944, 3.8 KB checked in by Flashpoint <vid.flashpoint@…>, 12 months ago (diff)

Added obiDictyMutants.py

Line 
1import os
2import urllib2
3import shutil
4
5from collections import defaultdict
6
7from Orange.orng import orngServerFiles
8
9class DictyMutant(object):
10
11    def __init__(self, mutant_line):
12        mutant = mutant_line.split("\t")
13        self.id = mutant[0]
14        self.descriptor = set(mutant[1].split("/"))
15        self.genes = mutant[2].split(" | ")
16        self.phenotypes = mutant[3].split(" | ")
17
18
19class DictyMutants(object):
20    VERSION=1
21    DEFAULT_DATABASE_PATH = orngServerFiles.localpath("DictyMutants") #use a default folder for storing the genesets
22
23    def __init__(self, local_database_path=None):
24        self.local_database_path = local_database_path if local_database_path is not None else self.DEFAULT_DATABASE_PATH
25
26        if not os.path.exists(self.local_database_path):
27            self.download_from_dictybase(self.local_database_path)
28
29        filename = os.path.join(self.local_database_path, "DictyMutants")
30        self.load(filename)
31
32    @classmethod
33    def download_from_dictybase(cls, local_database_path=None):
34        cls.local_database_path = local_database_path if local_database_path is not None else cls.DEFAULT_DATABASE_PATH
35
36        if not os.path.exists(cls.local_database_path):
37            os.mkdir(cls.local_database_path)
38
39        filename = os.path.join(cls.local_database_path, "DictyMutants")
40        temp_file = os.path.join(cls.local_database_path, "DictyMutantsTemp")
41        stream = urllib2.urlopen("http://dictybase.org/db/cgi-bin/dictyBase/download/download.pl?area=mutant_phenotypes&ID=all-mutants.txt")
42
43        with open(temp_file, "wb") as file:
44            shutil.copyfileobj(stream, file)
45
46        if os.path.exists(filename):
47            current = open(filename, "rb").read()
48            temporary = open(temp_file, "rb").read()
49            current.close()
50            temporary.close()
51            if current == temporary:
52                os.remove(temp_file)
53                return False
54
55        os.rename(temp_file, filename)
56        return True
57
58    @classmethod
59    def get_instance(cls):
60        if not hasattr(cls, "_shared_dict"):
61            dicty = DictyMutants()
62            cls._shared_dict = dicty.__dict__
63        instance = DictyMutants.__new__(DictyMutants)
64        instance.__dict__ = cls._shared_dict
65        return instance
66
67    def load(self, filename):
68        file = open(filename, "rb")
69        header = file.readline().rstrip()
70        lines = file.read().splitlines()
71        self._dicty_mutants = dict([(DictyMutant(line).id, DictyMutant(line)) for line in lines if line])
72
73    def mutants(self):
74        return self._dicty_mutants.values()
75
76    def genes(self):
77        return sorted(set(reduce(list.__add__, [self.mutant_genes(mutant.id) for mutant.id in self.mutants()], [])))
78
79    def mutant_genes(self, mutant):
80        return DictyMutant(self._dicty_mutants[mutant]).genes
81
82    def gene_mutants(self):
83        d = defaultdict(set)
84        for mutant, genes in [(mutant, self.mutant_genes(mutant)) for mutant in self.mutants()]:
85            for gene in genes:
86                d[gene].add(mutant)
87        return d
88
89def mutants():
90    """ Return all mutant objects
91    """
92    return DictyMutants.get_instance().mutants()
93
94def genes():
95    """ Return a set of all genes referenced in dictybase
96    """
97    return DictyMutants.get_instance().genes()
98
99def mutant_genes(mutant):
100    """ Return a set of all genes referenced by a mutant in dictybase
101    """
102    return DictyMutants.get_instance().mutant_genes(mutant)
103
104def gene_mutants():
105    """ Return a dictionary {gene: set(mutant_objects for mutant), ...}
106    """
107    return DictyMutants.get_instance().gene_mutants()
108
109if  __name__  == "__main__":
110    """
111    Test whether the file contains only unique entries
112    """
113    entries = [ entry.id for entry in mutants() ]
114    print len(set(entries)), len(entries)
115    #print(mutants())
116    #print(genes())
Note: See TracBrowser for help on using the repository browser.