Changeset 718:ec997c138f9e in orange-bioinformatics
- Timestamp:
- 03/23/09 10:44:10 (4 years ago)
- Branch:
- default
- Convert:
- 6f8f5db40ff0d4754bbcf38aa8ed655ffe881d7a
- File:
-
- 1 edited
-
obiGene.py (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
-
obiGene.py
r717 r718 10 10 import obiTaxonomy 11 11 import orngServerFiles 12 from obiTaxonomy import TextDB13 from weakref import WeakValueDictionary14 12 15 13 default_database_path = orngServerFiles.localpath("NCBI_geneinfo") 16 14 17 15 class GeneInfo(object): 18 __slots__ = ("tax_id", "gene_id", "symbol", "locus_tag", "synonyms", 19 "dbXrefs", "chromosome", "map_location", "description", "type", 20 "symbol_from_nomenclature_authority", "full_name_from_nomenclature_authority", 21 "nomenclature_status", "other_designations", "modification_date") 16 """ An object representing the NCBI information for a gene. 17 """ 18 __slots__ = ncbi_geneinfo_tags 22 19 def __init__(self, line): 20 """ Construct the GeneInfo object from a line in the NCBI gene_info file 21 """ 23 22 line = line.split("\t") 24 ## line = line[:1] + line[2:]25 23 for attr, value in zip(self.__slots__, line): 26 24 if value == "-": … … 44 42 45 43 class NCBIGeneInfo(dict): 46 ## _object_cache = WeakValueDictionary()47 44 _object_cache = {} 48 45 def __init__(self, *args, **kwargs): 49 """ An object for accessing NCBI gene info 46 """ An dictionary like object for accessing NCBI gene info 47 Arguments:: 48 - *organsim* Organism id 49 50 Example:: 51 >>> info = NCBIGeneInfo("Homo sapiens") 50 52 """ 51 53 if args and type(args[0]) in [str, unicode]: … … 58 60 elif len(taxids) > 1: 59 61 raise obiTaxonomy.MultipleSpeciesException, ", ".join(["%s: %s" % (id, obiTaxonomy.name(id)) for id in taxids]) 60 ## self.taxid = args[0] 62 61 63 self.taxid = taxids.pop() 62 64 if not os.path.exists(orngServerFiles.localpath("NCBI_geneinfo", "gene_info.%s.db" % self.taxid)): … … 64 66 file = open(orngServerFiles.localpath("NCBI_geneinfo", "gene_info.%s.db" % self.taxid), "rb") 65 67 self.update(dict((line.split("\t", 3)[1], line) for line in file.read().split("\n") if line.strip() and not line.startswith("#"))) 66 ## if self.taxid not in self._object_cache: 67 ## if not os.path.exists(orngServerFiles.localpath("NCBI_geneinfo", "gene_info.%s.db" % self.taxid)): 68 ## orngServerFiles.download("NCBI_geneinfo", "gene_info.%s.db" % self.taxid) 69 ## self._object_cache[self.taxid] = self.load(orngServerFiles.localpath("NCBI_geneinfo", "gene_info.%s.db" % self.taxid)) 70 ## 71 ## self.__dict__ = self._object_cache[self.taxid].__dict__ 68 72 69 else: 73 70 dict.__init__(self, *args, **kwargs) … … 76 73 @classmethod 77 74 def load(cls, file): 75 """ A class method that loads gene info from file 76 """ 78 77 if type(file) in [str, unicode]: 79 78 file = open(file, "rb") 80 ## self._data = dict([(line.split("\t", 3)[1], line) for line in file.read().split("\n") if line])81 ## self.update(dict([(line.split("\t", 3)[1], line) for line in file.read().split("\n") if line]))82 79 return cls((line.split("\t", 3)[1], line) for line in file.read().split("\n") if line.strip() and not line.startswith("#")) 83 80 84 def get_info(self, id): 85 ## return GeneInfo(self._data[id]) 86 return self[id] 81 def get_info(self, gene_id): 82 """ Search and return the GeneInfo object for gene_id 83 """ 84 return self[gene_id] 87 85 88 86 def __call__(self, name): 87 """ Search and return the GeneInfo object for gene_id 88 """ 89 89 translate = lambda a:a 90 90 id = translate(name) … … 125 125 @staticmethod 126 126 def get_geneinfo_from_ncbi(progressCallback=None): 127 ## if type(file) in [unicode, str]:128 ## file = open(file, "wb")129 127 import urllib2, gzip 130 128 from cStringIO import StringIO … … 133 131 return info 134 132 135 def prepare_gene_info(file):136 info = NCBIGeneInfo.load(file)137 taxids = ["9606"]#obiTaxonomy.common_organisms()138 genes = dict([(taxid, []) for taxid in taxids])139 for gi in info.itervalues():140 if gi.tax_id in taxids:141 genes[gi.tax_id].append(gi)142 143 for taxid, genes in genes.items():144 f = open("gene_info.%s.db" % taxid, "wb")145 f.write("\n".join([str(gi) for gi in sorted(genes, key=lambda gi:int(gi.gene_id))]))146 147 133 148 134 if __name__ == "__main__": 149 ## info = NCBIGeneInfo("9606") 150 ## gi = info("1") 151 ## print type(gi), type(info.get("1")) 152 ## print type(info.values()[0]) 153 prepare_gene_info("D:/Download/gene_info/Homo_sapiens.gene_info") 154 ## print gi.tax_id, gi.synonyms, gi.dbXrefs, gi.symbol_from_nomenclature_authority, gi.full_name_from_nomenclature_authority 135 info = NCBIGeneInfo("9606") 136 gi = info(list(info)[0]) 137 print gi.tax_id, gi.synonyms, gi.dbXrefs, gi.symbol_from_nomenclature_authority, gi.full_name_from_nomenclature_authority 155 138
Note: See TracChangeset
for help on using the changeset viewer.
