source: orange-bioinformatics/_bioinformatics/obiGeneAtlasSets.py @ 1827:d9c6f60c5632

Revision 1827:d9c6f60c5632, 2.8 KB checked in by Flashpoint <vid.flashpoint@…>, 10 months ago (diff)

Removed the select/unselect all gene sets check box from OWSetEnrichment.py widget

Line 
1#!/usr/bin/env python
2
3from collections import defaultdict
4from Orange.bio.obiGeneAtlas import run_simple_query
5from Orange.bio.obiGeneSets import GeneSets, GeneSet
6import time
7
8def display_string(string):
9    return string.capitalize().replace("_", " ")
10
11regulation = "updown"
12organism   = "Homo sapiens"
13condition  = "organism_part"
14org_code   = "9606"
15max_pvalue = 1e-5
16
17query = run_simple_query(regulation=regulation, organism=organism, condition=condition, start=0, rows=5) # Do a short pre-query to see how many genes there are
18total_genes = query["totalResults"]
19
20no_of_genes = 50 # 50 genes per query used, since the HTTP requests don't seem stable with more than 50 genes at a time.
21no_of_pages = total_genes / no_of_genes  # Gene Expression Atlas API only permits access to [50,200] genes at a time.
22
23print total_genes
24#exit()
25
26i=0
27sets = defaultdict(list)
28
29for start in range(no_of_pages+1):
30    start = time.time()
31    query = run_simple_query(regulation=regulation, organism=organism, condition=condition, start=start*no_of_genes, rows=no_of_genes)
32    for result in query["results"]:
33        i += 1
34        print result["gene"]["name"] + "\t" + str(i) # For printing out gene names and the current number of genes during debugging
35        for exp in result["expressions"]: 
36            diff_exp = [e for e in exp["experiments"] if e["pvalue"] <= max_pvalue] # Use only genes that are significantly diff. expressed
37            if diff_exp:
38                try:
39                    sets[exp["ef"], exp["efv"]].append(result["gene"]["name"]) # The Gene Expression Atlas entries are not consistent.
40                except:
41                    sets[exp["efoTerm"], exp["efoId"]].append(result["gene"]["name"])
42    print time.time() - start
43       
44gene_sets = []
45for (ef, efv), genes in sets.items():
46    ef_display = display_string(ef)
47    gs = GeneSet(genes, "Diff. expressed in %s=%s." % (ef_display, efv), id=ef + ":" + efv,
48                 description="Diff. expressed in %s=%s." % (ef_display, efv),
49                 link="http://www.ebi.ac.uk/gxa/qrs?specie_0={organism}&gprop_0=&gnot_0=&gval_0=&fact_1=&fexp_1=UPDOWN&fmex_1=&fval_1=%22{efv}%22+&view=hm".format( \
50                         organism = "+".join(organism.lower().split()), efv = "+".join(efv.lower().split())), organism=org_code, hierarchy=("Gene Expression Atlas", display_string(condition)))
51    gene_sets.append(gs)
52
53final_set = GeneSets(gene_sets)
54
55print final_set
56exit()
57
58from Orange.bio.obiGeneSets import register
59import Orange.utils.serverfiles as serverfiles
60import sys
61
62try:
63    sf_server = serverfiles.ServerFiles(sys.argv[1], sys.argv[2])
64except:
65    print "argv[1] = username, argv[2] = password"
66    exit()
67
68set_split = final_set.split_by_hierarchy()
69
70for s in set_split:
71    register(s, sf_server)
72
73print "Gene sets successfully registered..."
Note: See TracBrowser for help on using the repository browser.