Changeset 1351:c9470ca4ca88 in orange-bioinformatics for obiGO.py


Ignore:
Timestamp:
03/23/11 14:39:25 (3 years ago)
Author:
ales_erjavec <ales.erjavec@…>
Branch:
default
Convert:
3a0d3f2492d79d12e09abc29286d3ca2695d21eb
Message:

Using intern function to reduce the memory consumption of loaded annotations terms.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • obiGO.py

    r1334 r1351  
    159159    >>> OBOObject(r"[Term]\nid: FOO:001\nname: bar", ontology) 
    160160    """ 
     161    _INTERN_TAGS = ["id", "name", "namespace", "alt_id", "is_a"] 
    161162    def __init__(self, stanza=None, ontology=None): 
    162163        self.ontology = ontology 
     
    169170 
    170171    def ParseStanza(self, stanza): 
     172        intern_tags = set(self._INTERN_TAGS) 
    171173        for line in stanza.split("\n"): 
    172174            if ":" not in line: 
     
    181183            else: 
    182184                value = rest 
     185            tag = intern(tag) 
    183186            value = value.strip() 
     187            comment = comment.strip() 
     188            if tag in intern_tags: 
     189                value, comment = intern(value), intern(comment) 
    184190            self._lines.append((tag, value, modifiers, comment)) 
    185191            if tag in multipleTagSet: 
     
    198204        """ 
    199205        ##TODO: add other defined Typedef ids 
    200         result = [(typeId, id) for typeId in ["is_a"] for id in self.values.get(typeId, [])]  
    201         result = result + [tuple(r.split(None, 1)) for r in self.values.get("relationship", [])] 
     206        typeIds = [intern("is_a")] 
     207        result = [(typeId, id) for typeId in typeIds for id in self.values.get(typeId, [])]  
     208        result = result + [tuple(map(intern, r.split(None, 1))) for r in self.values.get("relationship", [])] 
    202209        return result 
    203210 
     
    435442    data members for quicker access: geneName, GOId, evidence, aspect and 
    436443    alias(a list of aliases) 
     444     
    437445    """ 
    438446    __slots__ = annotationFields + ["geneName", "GOId", "evidence", 
    439447                                    "aspect", "alias", "additionalAliases"] 
    440448    def __init__(self, fullText): 
     449        """\ 
     450        :param fulText: A single line from the annotation file. 
     451         
     452        """ 
    441453        for slot, val in zip(annotationFields, fullText.split("\t")): 
    442             setattr(self, slot, val) 
     454            setattr(self, slot, intern(val)) 
    443455        self.geneName = self.DB_Object_Symbol 
    444456        self.GOId = self.GO_ID 
    445457        self.evidence = self.Evidence_Code 
    446458        self.aspect = self.Aspect 
    447         self.alias = self.DB_Object_Synonym.split("|") 
     459        self.alias = list(map(intern, self.DB_Object_Synonym.split("|"))) 
    448460 
    449461        self.additionalAliases = [] 
    450462        if ":" in self.DB_Object_Name: 
    451             self.additionalAliases = _re_obj_name_.findall(self.DB_Object_Name.split(":")[0]) 
     463            self.additionalAliases = [] #_re_obj_name_.findall(self.DB_Object_Name.split(":")[0]) 
    452464 
    453465    def __getattr__(self, name): 
Note: See TracChangeset for help on using the changeset viewer.