source: orange-bioinformatics/orangecontrib/bio/obiArrayExpress.py @ 2015:2370b2755a4e

Revision 2015:2370b2755a4e, 16.1 KB checked in by Ales Erjavec <ales.erjavec@…>, 6 weeks ago (diff)

Moved deprecated GeneExpressionAtlas code back to deprecated obiArrayExpress.

Line 
1from __future__ import absolute_import
2
3from .arrayexpress import *
4
5import warnings
6
7"""
8Gene Expression Atlas
9---------------------
10
11.. WARNING:: Deprecated, use ``obiGeneAtlas`` instead.
12
13`Gene Expression Atlas <http://www.ebi.ac.uk/gxa/>`_ is a curated subset of
14gene expression experiments in Array Express Archive.
15
16Use `query_atlas_simple` for simple querys.
17
18Example (query human genes for experiments in which they are up regulated) ::
19
20    >>> obiArrayExpress.query_atlas_simple(genes=["SORL1", "PSIP1", "CDKN1C"], regulation="up", organism="Homo sapiens")
21    {u'...
22
23Or use the `AtlasCondition` subclasses in this module to construct a more
24advanced query and use the `query_atlas` function.
25
26Example (query human genes annotated to the GO term 'transporter activity'
27that are up regulated in the liver in at least three experiments) ::
28
29    >>> go_cond = AtlasConditionGeneProperty("Goterm", "Is", "transporter activity")
30    >>> liver_cond = AtlasConditionExperimentalFactor("Organism_part", "up", 3, "liver")
31    >>> org_cond = AtlasConditionOrganism("Homo sapiens")
32    >>> cond_list = AtlasConditionList([go_cond, liver_cond, org_cond])
33    >>> query_atlas(cond_list)
34    {u'...
35
36"""
37
38
39def _open_shelve(filename, flag="r"):
40    dirname = os.path.dirname(filename)
41    if not os.path.isdir(dirname):
42        os.makedirs(dirname)
43    exists = os.path.exists(filename)
44    if flag in ["r", "w"] and not exists:
45        # needs to be created first
46        # XXX: Race condition
47        s = shelve.open(filename, "c")
48        s.close()
49
50    return shelve.open(filename, flag)
51
52
53@contextmanager
54def _fake_closing(obj):
55    yield obj
56
57
58class GeneExpressionAtlasConenction(object):
59
60    """
61    A connection to Gene Expression Atlas database.
62
63    :param address:
64        Address of the GXA server (default: http://www.ebi.ac.uk/gxa/api/deprecated).
65    :param timeout:
66        Socket timeout (default 30).
67    :param cache:
68        A dict like object to use as a cache.
69
70    """
71    DEFAULT_ADDRESS = "http://www-test.ebi.ac.uk/gxa/api/deprecated"
72
73    DEFAULT_CACHE = serverfiles.localpath(
74        "ArrayExpress", "GeneAtlasCache.shelve")
75
76    def __init__(self, address=None, timeout=30, cache=None):
77        """
78        Initialize the connection.
79
80        """
81        self.address = address if address is not None else self.DEFAULT_ADDRESS
82        self.timeout = timeout
83        if cache is None:
84            cache = self.DEFAULT_CACHE
85
86        self.cache = cache
87
88    def query(self, condition, format="json", start=None, rows=None, indent=False):
89        url = self.address + "?" + condition.rest()
90        if start is not None and rows is not None:
91            url += "&start={0}&rows={1}".format(start, rows)
92        url += "&format={0}".format(format)
93        if indent:
94            url += "&indent"
95#        print url
96        if self.cache is not None:
97            return self._query_cached(url)
98        else:
99            return urllib2.urlopen(url)
100
101    def _query_cached(self, url):
102        if self.cache is not None:
103            with self.open_cache("r") as cache:
104                if url in cache:
105                    return StringIO(cache[url])
106
107            response = urllib2.urlopen(url)
108            contents = response.read()
109            with self.open_cache("w") as cache:
110                cache[url] = contents
111
112            return StringIO(contents)
113        else:
114            return urllib2.urlopen(url)
115
116    def open_cache(self, flag="r"):
117        """
118        Return a context manager for a dict like object.
119        """
120        if isinstance(self.cache, basestring):
121            try:
122                return closing(_open_shelve(self.cache, flag))
123            except Exception:
124                return _fake_closing({})
125        else:
126            return _fake_closing(self.cache)
127
128
129# Names of all Gene Property filter names
130GENE_FILTERS = \
131    ["Name",  # Gene name
132     "Goterm",  # Gene Ontology Term
133     "Interproterm",  # InterPro Term
134     "Disease",  # Gene-Disease Assocation
135     "Keyword",  # Gene Keyword
136     "Protein",  # Protein
137
138     "Dbxref",  # Other Database Cross-Refs
139     "Embl",  # EMBL-Bank ID
140     "Ensfamily",  # Ensembl Family
141     "Ensgene",  # Ensembl Gene ID
142
143     "Ensprotein",  # Ensembl Protein ID
144     "Enstranscript",  # Ensembl Transcript ID
145     "Goid",  # Gene Ontology ID
146     "Image",  # IMAGE ID
147     "Interproid",  # InterPro ID
148     "Locuslink",  # Entrez Gene ID
149
150     "Omimid",  # OMIM ID
151     "Orf",  # ORF
152     "Refseq",  # RefSeq ID
153     "Unigene",  # UniGene ID
154     "Uniprot",  # UniProt Accession
155
156     "Hmdb",  # HMDB ID
157     "Chebi",  # ChEBI ID
158     "Cas",  # CAS
159     "Uniprotmetenz",  # Uniprotmetenz
160     "Gene",  # Gene Name or Identifier
161     "Synonym",  # Gene Synonym
162     ]
163
164# Valid Gene Property filter qualifiers
165GENE_FILTER_QUALIFIERS =\
166    ["Is",
167     "IsNot"
168     ]
169
170# Organisms in the Atlas
171ATLAS_ORGANISMS = \
172    ["Anopheles gambiae",
173     "Arabidopsis thaliana",
174     "Bos taurus",
175     "Caenorhabditis elegans",
176     "Danio rerio",
177     "Drosophila melanogaster",
178     "Epstein barr virus",
179     "Gallus gallus",
180     "Homo sapiens",
181     "Human cytomegalovirus",
182     "Kaposi sarcoma-associated herpesvirus",
183     "Mus musculus",
184     "Rattus norvegicus",
185     "Saccharomyces cerevisiae",
186     "Schizosaccharomyces pombe",
187     #     "Unknown",
188     "Xenopus laevis"
189     ]
190
191#_COMMON_TAXIDS = \
192#    {"Anopheles gambiae",
193#     "Arabidopsis thaliana",
194#     "Bos taurus",
195#     "Caenorhabditis elegans",
196#     "Danio rerio",
197#     "Drosophila melanogaster",
198#     "Epstein barr virus",
199#     "Gallus gallus",
200#     "Homo sapiens",
201#     "Human cytomegalovirus",
202#     "Kaposi sarcoma-associated herpesvirus",
203#     "Mus musculus",
204#     "Rattus norvegicus",
205#     "Saccharomyces cerevisiae",
206#     "Schizosaccharomyces pombe",
207# "Unknown",
208#     "Xenopus laevis"
209#     }
210
211
212def ef_ontology():
213    """ Return the `EF <http://www.ebi.ac.uk/efo/>`_ (Experimental Factor) ontology
214    """
215    from . import obiOntology
216#    return obiOntology.OBOOntology(urllib2.urlopen("http://efo.svn.sourceforge.net/svnroot/efo/trunk/src/efoinobo/efo.obo"))
217    # Should this be in the OBOFoundry (Ontology) domain
218    try:
219        file = open(serverfiles.localpath_download("ArrayExpress", "efo.obo"), "rb")
220    except urllib2.HTTPError:
221        file = urllib2.urlopen("http://efo.svn.sourceforge.net/svnroot/efo/trunk/src/efoinobo/efo.obo")
222    return obiOntology.OBOOntology(file)
223
224
225class AtlasCondition(object):
226
227    """ Base class for Gene Expression Atlas query condition
228    """
229
230    def validate(self):
231        """ Validate condition in a subclass.
232        """
233        raise NotImplementedError
234
235    def rest(self):
236        """ Return a REST query part in a subclass.
237        """
238        raise NotImplementedError
239
240
241class AtlasConditionList(list, AtlasCondition):
242
243    """ A list of AtlasCondition instances.
244    """
245
246    def validate(self):
247        for item in self:
248            item.validate()
249
250    def rest(self):
251        return "&".join(cond.rest() for cond in self)
252
253
254class AtlasConditionGeneProperty(AtlasCondition):
255
256    """ An atlas gene filter condition.
257
258    :param property: Property of the gene. If None or "" all properties
259        will be searched.
260    :param qualifier: Qualifier can be 'Is' or 'IsNot'
261    :param value: The value to search for.
262
263    Example ::
264
265        >>> # Condition on a gene name
266        >>> condition = AtlasConditionGeneProperty("Name", "Is", "AS3MT")
267        >>> # Condition on genes from a GO Term
268        >>> condition = AtlasConditionGeneProperty("Goterm", "Is", "p53 binding")
269        >>> # Condition on disease association
270        >>> condition = AtlasConditionGeneProperty("Disease", "Is", "cancer")
271
272    """
273
274    def __init__(self, property, qualifier, value):
275        self.property = property or ""
276        self.qualifier = qualifier
277        if isinstance(value, basestring):
278            self.value = value.replace(" ", "+")
279        elif isinstance(value, list):
280            self.value = "+".join(value)
281        else:
282            raise ValueError(value)
283
284        self.validate()
285
286    def validate(self):
287        assert(self.property in GENE_FILTERS + [""])
288        assert(self.qualifier in GENE_FILTER_QUALIFIERS + [""])
289
290    def rest(self):
291        return "gene{property}{qualifier}={value}".format(**self.__dict__)
292
293
294class AtlasConditionExperimentalFactor(AtlasCondition):
295
296    """ An atlas experimental factor filter condition.
297
298    :param factor: EFO experiamntal factor
299    :param regulation: "up", "down", "updown", "any" or "none"
300    :param n: Minimum number of of experimants with this condition
301    :param value: Experimantal factor value
302
303    Example ::
304
305        >>> # Any genes up regulated in at least 3 experiments involving cancer.
306        >>> condition = AtlasConditionExperimentalFactor("", "up", 3, "cancer")
307        >>> # Only genes which are up/down regulated in the heart in at least one experiment.
308        >>> condition = AtlasConditionExperimentalFactor("Organism_part", "updown", 1, "heart")
309
310    """
311
312    def __init__(self, factor, regulation, n, value):
313        self.factor = factor
314        self.regulation = regulation
315        self.n = n
316        self.value = value
317        self.validate()
318
319    def validate(self):
320        # TODO: validate the factor and value
321#        assert(self.factor in ef_ontology())
322        assert(self.regulation in ["up", "down", "updown"])
323
324    def rest(self):
325        return "{regulation}{n}In{factor}={value}".format(**self.__dict__)
326
327
328class AtlasConditionOrganism(AtlasCondition):
329
330    """ Condition on organism.
331    """
332
333    def __init__(self, organism):
334        self.organism = organism
335        self.validate()
336
337    def validate(self):
338        assert(self.organism in ATLAS_ORGANISMS)
339
340    def rest(self):
341        return "species={0}".format(self.organism.replace(" ", "+").lower())
342
343
344class AtlasConditionExperiment(AtlasCondition):
345
346    """ Condition on experiement
347
348    :param property: Property of the experiment. If None or "" all properties
349        will be searched.
350    :param qualifier: Qualifier can be 'Has' or 'HasNot'
351    :param value: The value to search for.
352
353    Example ::
354
355        >>> # Condition on a experiemnt acession
356        >>> condition = AtlasConditionExperiment("", "", "E-GEOD-24283")
357        >>> # Condition on experiments involving lung
358        >>> condition = AtlasConditionExperiment("Organism_part", "Has", "lung")
359
360    """
361#    EXPERIMENT_FILTERS = [
362#                "Organism"
363#                "Factor"]
364
365    EXPERIMENT_FILTER_QUALIFIERS = [
366        "Has",
367        "HasNot"]
368
369    def __init__(self, property, qualifier, value):
370        self.property = property
371        self.qualifier = qualifier
372        if isinstance(value, basestring):
373            self.value = value.replace(" ", "+")
374        elif isinstance(value, list):
375            self.value = "+".join(value)
376        else:
377            raise ValueError(value)
378
379        self.validate()
380
381    def validate(self):
382        # TODO: check to EFO factors
383#        assert(self.property in EXPERIMENT_FILTERS + [""])
384        assert(self.qualifier in self.EXPERIMENT_FILTER_QUALIFIERS + [""])
385
386    def rest(self):
387        return "experiment{property}{qualifier}={value}".format(**self.__dict__)
388
389
390class GeneAtlasError(ValueError):
391
392    """ An error response from the Atlas server.
393    """
394    pass
395
396
397def __check_atlas_error_json(response):
398    if "error" in response:
399        raise GeneAtlasError(response["error"])
400    return response
401
402
403def __check_atlas_error_xml(response):
404    error = response.find("error")
405    if error is not None:
406        raise GeneAtlasError(error.text)
407    return response
408
409
410def query_atlas_simple(genes=None, regulation=None, organism=None,
411                       condition=None, format="json", start=None,
412                       rows=None):
413    """ A simple Atlas query.
414
415    :param genes: A list of gene names to search for.
416    :param regulation: Search for experiments in which `genes` are "up",
417        "down", "updown" or "none" regulated. If None all experiments
418        are searched.
419    :param organism: Search experiments for organism. If None all experiments
420        are searched.
421    :param condition: An EFO factor value (e.g. "brain")
422
423    Example ::
424
425        >>> query_atlas_simple(genes=['Pou5f1', 'Dppa3'], organism="Mus musculus")
426        {u'...
427
428        >>> query_atlas_simple(genes=['Pou5f1', 'Dppa3'], regulation="up", organism="Mus musculus")
429        {u'...
430
431        >>> query_atlas_simple(genes=['Pou5f1', 'Dppa3'], regulation="up", condition="liver", organism="Mus musculus")
432        {u'...
433
434    """
435    warnings.warn("Use 'obiGeneAtlas.run_simple_query' instead.", DeprecationWarning)
436    conditions = AtlasConditionList()
437    if genes:
438        conditions.append(AtlasConditionGeneProperty("Gene", "Is", genes))
439    if regulation or condition:
440        regulation = "any" if regulation is None else regulation
441        condition = "" if condition is None else condition
442        conditions.append(AtlasConditionExperimentalFactor("", regulation, 1, condition))
443    if organism:
444        conditions.append(AtlasConditionOrganism(organism))
445
446    connection = GeneExpressionAtlasConenction()
447    results = connection.query(conditions, format=format, start=start,
448                               rows=rows)
449    if format == "json":
450        return parse_json(results)
451    else:
452        return parse_xml(results)
453
454
455# TODO: can this be implemented query_atlas(organism="...", Locuslink="...", Chebi="...", up3InCompound="..." downInEFO="...")
456# Need a full list of accepted factors
457
458
459def query_atlas(condition, format="json", start=None, rows=None, indent=False, connection=None):
460    """ Query Atlas based on a `condition` (instance of AtlasCondition)
461
462    Example ::
463
464        >>> condition1 = AtlasConditionGeneProperty("Goterm", "Is", "p53 binding")
465        >>> condition2 = AtlasConditionExperimentalFactor("Organism_part", "up", 3, "heart")
466        >>> condition = AtlasConditionList([condition1, condition2])
467        >>> query_atlas(condition)
468        {u'...
469
470    """
471    warnings.warn("Use 'obiGeneAtlas.run_query' instead.", DeprecationWarning)
472    if connection is None:
473        connection = GeneExpressionAtlasConenction()
474    results = connection.query(condition, format=format, start=start,
475                               rows=rows, indent=indent)
476    if format == "json":
477        response = parse_json(results)
478        return __check_atlas_error_json(response)
479    else:
480        response = parse_xml(results)
481        return __check_atlas_error_xml(response)
482
483
484def get_atlas_summary(genes, organism, connection=None):
485    """ Return 3 dictionaries containing a summary of atlas information
486    about three experimental factors:
487
488        - Organism Part (OP)
489        - Disease State (DS)
490        - Cell type (CT)
491
492    Each dictionary contains query genes as keys. Values are dictionaries
493    mapping factor values to a 2-tuple containig the count of up regulated
494    and down regulated experiments.
495
496    Example ::
497
498        >>> get_atlas_summary(["RUNX1"], "Homo sapiens")
499        ({u'RUNX1': ...
500
501    """
502    warnings.warn("Use 'obiGeneAtlas.get_atlas_summary' instead.", DeprecationWarning)
503    genes_condition = AtlasConditionGeneProperty("Gene", "Is", genes)
504    org_condition = AtlasConditionOrganism(organism)
505    condition = AtlasConditionList([genes_condition, org_condition])
506    result = query_atlas(condition, format="json", connection=connection)
507
508    org_part = collect_ef_summary(result, "organism_part")
509    disease_state = collect_ef_summary(result, "disease_state")
510    cell_type = collect_ef_summary(result, "cell_type")
511
512    return dict(org_part), dict(disease_state), dict(cell_type)
513
514
515def collect_ef_summary(info, ef, summary=None):
516    """ Collect the results summary from query_atlas, result for experimental
517    factor `ef`.
518    """
519    if summary is None:
520        summary = defaultdict(dict)
521
522    results = info["results"]
523    for res in results:
524        gene = res["gene"]
525        expressions = res["expressions"]
526        for expression in expressions:
527            if expression["ef"] == ef:
528                efv = expression["efv"]
529                updown = (expression["upExperiments"],
530                          expression["downExperiments"]
531                          )
532
533                if any(updown):
534                    summary[gene["name"]][efv] = updown
535
536    return summary
Note: See TracBrowser for help on using the repository browser.