source: orange-bioinformatics/orangecontrib/bio/obiKEGG/entry/fields.py @ 1873:0810c5708cc5

Revision 1873:0810c5708cc5, 5.5 KB checked in by Ales Erjavec <ales.erjavec@…>, 6 months ago (diff)

Moved '_bioinformatics' into orangecontrib namespace.

RevLine 
[1532]1"""
[1734]2Wrapper classes for db entry fields to support pythonic
[1532]3interface.
[1734]4
[1532]5"""
6
[1734]7
[1532]8class DBField(object):
[1734]9    """
10    Base DBGET entry field
[1532]11    """
12    __SLOTS__ = ["text"]
[1734]13
[1532]14    def __init__(self, text):
15        self.text = text
[1734]16
[1532]17    def _convert(self):
[1734]18        """
19        Convert the contents into python representation using builtin types.
[1532]20        """
21        return self.text.rstrip("\n")
[1734]22
23
[1532]24class DBSimpleField(DBField):
[1734]25    """
26    Simple field (with no subsections).
[1532]27    """
28    __SLOTS__ = ["text"]
29    # TITLE must be set in subclasses or object instances
30    TITLE = None
[1734]31
[1532]32    def __str__(self):
33        return self.format()
[1734]34
[1532]35    def format(self, section_indent=12, subsection_indent=0):
36        fmt = (" " * subsection_indent) + "%-" + \
37              str(section_indent - subsection_indent) + \
38              "s%s"
39        text = self._indent(self.text, section_indent)
40        text = fmt % (self.TITLE, text)
41        return text
[1734]42
[1532]43    def _indent(self, text, section_indent=12):
44        indent_str = "\n" + " " * section_indent
45        nl_count = text.count("\n")
46        return text.replace("\n", indent_str, nl_count - 1)
[1734]47
48
[1532]49class DBEntryField(DBSimpleField):
[1734]50    """
51    ENTRY field (all entries start with this field)
[1532]52    """
53    __SLOTS__ = ["text"]
54    TITLE = "ENTRY"
[1734]55
56
[1532]57class DBNameField(DBSimpleField):
58    __SLOTS__ = ["text"]
59    TITLE = "NAME"
[1734]60
61
[1532]62class DBDefinitionField(DBSimpleField):
63    __SLOTS__ = ["text"]
64    TITLE = "DEFINITION"
[1734]65
66
[1532]67class DBFieldWithSubsections(DBSimpleField):
[1734]68    """
69    A field with subsections (for instance REFERENCE in genome)
[1532]70    """
71    __SLOTS__ = ["text", "subsections"]
72    TITLE = None
73    SUBSECTIONS = None
[1734]74
[1532]75    def __init__(self, text, subsections=None):
76        self.text = text
77        self.subsections = subsections or []
[1734]78
[1532]79    def format(self, section_indent=12, subsection_indent=2):
80        text = DBSimpleField.format(self, section_indent, subsection_indent=0)
81        subsections = [sub.format(section_indent, subsection_indent)\
82                       for sub in self.subsections]
83        return "".join([text] + subsections)
[1734]84
[1532]85    def _convert(self):
86        my = DBSimpleField._convert(self)
87        subs = [(s.TITLE.lower(), s._convert()) \
88                for s in self.subsections]
89        return (my, subs)
[1734]90
91
[1532]92class DBTaxonomyField(DBFieldWithSubsections):
93    __SLOTS__ = ["text", "subsections"]
94    TITLE = "TAXONOMY"
95    SUBSECTIONS = ["LINEAGE"]
[1734]96
[1532]97    @property
98    def taxid(self):
99        return DBSimpleField._convert(self).split(":")[1]
[1734]100
101
[1532]102class DBDataSourceField(DBSimpleField):
103    __SLOTS__ = ["text"]
104    TITLE = "DATA_SOURCE"
105
106
107class DBReference(DBFieldWithSubsections):
108    __SLOTS__ = ["text", "subsections"]
109    TITLE = "REFERENCE"
110    SUBSECTIONS = ["AUTHORS", "TITLE", "JOURNAL"]
[1734]111
[1532]112    @property
113    def authors(self):
114        return self.subsections[0]
[1734]115
[1532]116    @property
117    def title(self):
118        return self.subsections[1]
[1734]119
[1532]120    @property
121    def journal(self):
122        return self.subsections[2]
[1734]123
124
[1532]125class DBDBLinks(DBSimpleField):
126    __SLOTS__ = ["text"]
127    TITLE = "DBLINKS"
[1734]128
[1532]129    @property
130    def links(self):
131        return [tuple(s.split(": ", 1)) \
132                for s in self.text.splitlines()]
[1734]133
[1532]134    def _convert(self):
[1734]135        # Some dblinks can span multiple lines but are always 'indented'
[1682]136        links = DBSimpleField._convert(self).replace("\n ", "").splitlines()
[1532]137        links = [tuple(link.split(": ", 1)) for link in links]
138        links = [(key, [v for v in values.split(" ") if v]) \
139                 for key, values in links]
140        return dict(links)
[1734]141
142
[1532]143class DBPathway(DBSimpleField):
144    __SLOTS__ = ["text"]
145    TITLE = "PATHWAY"
[1734]146
[1532]147    @property
148    def pathways(self):
[1546]149        return self._convert()
[1734]150
[1546]151    def _convert(self):
152        text = DBSimpleField._convert(self)
153        return [line.split(" ", 1)[0] for line in text.splitlines()]
[1734]154
155
[1532]156class DBAASeq(DBSimpleField):
157    __SLOTS__ = ["text"]
158    TITLE = "AASEQ"
[1734]159
[1532]160    @property
161    def sequence(self):
162        return self.split("\n", 1)[1].replace("\n", "")
[1734]163
[1532]164    @property
165    def sequence_lenght(self):
166        return int(self.text.split("\n", 1)[0])
[1734]167
[1546]168    def _convert(self):
169        text = DBSimpleField._convert(self)
170        count, seq = text.split("\n", 1)
171        return seq.replace("\n", "")
[1734]172
173
[1532]174class DBNTSeq(DBSimpleField):
175    __SLOTS__ = ["text"]
176    TITLE = "NTSEQ"
[1734]177
[1532]178    @property
179    def sequence(self):
180        return self.split("\n", 1)[1].replace("\n", "")
[1734]181
[1532]182    @property
183    def sequence_lenght(self):
184        return int(self.text.split("\n", 1)[0])
[1734]185
[1546]186    def _convert(self):
187        text = DBSimpleField._convert(self)
188        count, seq = text.split("\n", 1)
189        return seq.replace("\n", "")
[1734]190
191
[1532]192class DBPathwayMapField(DBSimpleField):
193    __SLOTS__ = ["text"]
194    TITLE = "PATHWAY_MAP"
[1734]195
[1532]196    def kgml_url(self):
197        return "http://www.genome.jp/kegg-bin/download?entry={0}&format=kgml".format(self.pathway_id)
[1734]198
[1532]199    @property
200    def pathway_id(self):
201        return self.text.split(" ", 1)[0]
[1734]202
203
[1532]204class DBGeneField(DBSimpleField):
205    __SLOTS__ = ["text"]
206    TITLE = "GENE"
[1734]207
[1532]208    def _convert(self):
209        text = DBSimpleField._convert(self)
210        lines = text.splitlines()
211        return [line.split(" ", 1)[0] for line in lines]
[1734]212
213
[1546]214class DBEnzymeField(DBSimpleField):
215    __SLOTS__ = ["text"]
216    TITLE = "ENZYME"
[1734]217
[1546]218    def _convert(self):
219        text = DBSimpleField._convert(self)
220        lines = text.splitlines()
221        return lines
[1734]222
223
[1546]224class DBCompoundField(DBSimpleField):
225    __SLOTS__ = ["text"]
226    TITLE = "COMPOUND"
[1734]227
[1546]228    def _convert(self):
229        text = DBSimpleField._convert(self)
230        lines = text.splitlines()
231        return [line.split(" ", 1)[0] for line in lines]
Note: See TracBrowser for help on using the repository browser.