Changeset 936:5878bd4b8bfd in orange-bioinformatics for obiGsea.py
- Timestamp:
- 07/28/09 14:13:25 (4 years ago)
- Branch:
- default
- Convert:
- 1ef64a4c36f201cbd5edbe5c3a297b59e246326b
- File:
-
- 1 edited
-
obiGsea.py (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
-
obiGsea.py
r934 r936 673 673 and names of genes as attribute names. 674 674 675 If phenVar is False, then we can work, then the input already 676 consists of scores of differential expressions 677 675 678 If we have a single column, transpose it. 676 679 If phenVar is one of the groups, transpose the matrix. 677 680 """ 678 681 679 def transpose_data(data): 680 columns = [a for a in data.domain] + [ data.domain.getmeta(a) for a in list(data.domain.getmetas()) ] 681 floatvars = [ a for a in columns if a.varType == orange.VarTypes.Continuous ] 682 if len(floatvars) == 1: 683 floatvar = floatvars[0] 684 stringvar = [ a for a in columns if a.varType == 6 ][0] 685 686 tup = [ (ex[stringvar].value, ex[floatvar].value) for ex in data ] 687 newdom = orange.Domain([orange.FloatVariable(name=a[0]) for a in tup ], False) 688 example = [ a[1] for a in tup ] 689 ndata = orange.ExampleTable(newdom, [example]) 690 return ndata 691 return data 692 693 #transform every example table example tables 694 682 def prepare_data(data, phenVar=None, geneVar=None): 683 684 def rorq(a, name): 685 """ Group annatation or question mark. """ 686 try: 687 return a.attributes[name] 688 except: 689 return '?' 690 691 #use class as phenotype by default, if it is present, 692 #if not, do not use any phenotype! 693 if phenVar == None: 694 if not data.domain.classVar: 695 phenVar = False 696 else: 697 phenVar = data.domain.classVar 698 699 700 #TODO validate phenVar and geneVar? 701 #TODO autodetection of groups? 702 703 #transpose is not needed if phenVar is classVar or phenVar is False 704 #and there is only one sample 705 if phenVar == data.domain.classVar or \ 706 (phenVar == False and len(data) == 1): 707 708 if geneVar == None: #if not specified, set as true in this stage 709 geneVar = True 710 711 floatvars = [ a for a in data.domain.attributes \ 712 if a.varType == orange.VarTypes.Continuous ] 713 714 #rename attributes without touching the original variable 715 if geneVar != True: 716 fl2 = [] 717 718 for a in floatvars: 719 na = orange.FloatVariable(name=rorq(a, geneVar)) 720 na.getValueFrom = lambda e, rw: e[a] 721 fl2.append(na) 722 723 floatvars = fl2 724 725 dom = orange.Domain(floatvars, phenVar) 726 return orange.ExampleTable(dom, data) 727 728 elif phenVar == False or phenVar != data.domain.classVar: 729 730 cands = allgroups(data) 731 pv = False 732 if phenVar != False: 733 pv = orange.EnumVariable(name="phenotype", 734 values=list(cands[phenVar])) 735 736 #take the only string attribute as a gene name 737 gc = gene_cands(data, False) 738 if geneVar == None: 739 if len(gc) == 1: 740 geneVar = gc[0] 741 else: 742 geneNamesUnspecifiedError() 743 744 latts = [ orange.FloatVariable(name=ex[geneVar].value) \ 745 for ex in data ] 746 747 domain = orange.Domain(latts, pv) 748 749 examples = [] 750 for at in data.domain.attributes: 751 if at.varType == orange.VarTypes.Continuous: 752 vals = [ ex[at].value for ex in data ] 753 if pv != False: #add class value 754 vals.append(rorq(at, phenVar)) 755 examples.append(orange.Example(domain, vals)) 756 757 return orange.ExampleTable(domain, examples) 758 else: 759 wrongInputsError() 760 761 #transform all example tables 695 762 single = iset(data) 696 transposed = [ transpose_data(d) for d in wrap_in_list(data) ]763 transposed = [ prepare_data(d, phenVar, geneVar) for d in wrap_in_list(data) ] 697 764 698 765 if single: … … 725 792 return filter(lambda x: len(x[1]) >= 2, cands) 726 793 727 def gene_cands(data, phenVar):794 def gene_cands(data, correct): 728 795 """ 729 796 Returns all valid gene descriptors with regards to the choosen … … 732 799 descriptions in attr.attributes and True for the usage 733 800 of attribute names. 734 """ 735 if is_variable(phenVar[0]): 801 Correct is True, if the example table has genes as attributes. 802 """ 803 if correct: 736 804 #gene names could be in attributes or as gene names (marker True) 737 805 return [True] + nth(sorted(allgroups(data)),0) … … 894 962 """ 895 963 gso = GSEA(data, organism=organism, matcher=matcher, 896 classValues=classValues, atLeast=atLeast, caseSensitive=caseSensitive) 964 classValues=classValues, atLeast=atLeast, caseSensitive=caseSensitive, 965 geneVar=geneVar, phenVar=phenVar) 897 966 if geneSets == None: 898 967 genesets = collections(default=True) 899 968 gso.addGenesets(geneSets) 900 969 res1 = gso.compute(n=n, permutation=permutation, minSize=minSize, 901 maxSize=maxSize, minPart=minPart, geneVar=geneVar, phenVar=phenVar, 902 **kwargs) 970 maxSize=maxSize, minPart=minPart, **kwargs) 903 971 return res1 904 972 … … 972 1040 if __name__=="__main__": 973 1041 974 data = orange.ExampleTable("sterolTalkHepaM.tab") 975 print phenotype_cands(data) 976 print is_variable(phenotype_cands(data)[0][0]) 977 978 """ 1042 #data = orange.ExampleTable("sterolTalkHepa.tab") 1043 979 1044 data = orange.ExampleTable("gene_three_lines_log.tab") 980 print phenotype_cands(data)981 print is_variable(phenotype_cands(data)[0][0])982 """983 1045 984 1046 gen1 = collections(['steroltalk.gmt', ':kegg:hsa'], default=False) 985 986 gen1 = dict([ ('[KEGG] Complement and coagulation cascades', gen1['[KEGG] Complement and coagulation cascades'])]) 1047 #gen1 = dict([ ('[KEGG] Complement and coagulation cascades', gen1['[KEGG] Complement and coagulation cascades'])]) 987 1048 988 1049 rankingf = rankingFromOrangeMeas(MA_anova()) 1050 989 1051 matcher = obiGene.matcher([obiGene.GMKEGG('hsa')]) 990 1052 991 out = runGSEA(data, n=10, geneSets=gen1, permutation="gene", atLeast=3, matcher=matcher, rankingf=rankingf) 1053 #out = runGSEA(data, n=10, geneSets=gen1, permutation="gene", atLeast=3, matcher=matcher, rankingf=rankingf) 1054 1055 geneVar = gene_cands(data, False)[1] 1056 out = runGSEA(data, n=10, geneSets=gen1, permutation="gene", atLeast=3, matcher=matcher, rankingf=rankingf, phenVar="group", geneVar=geneVar) 1057 992 1058 print "\n".join(map(str,sorted(out.items()))) 993 1059
Note: See TracChangeset
for help on using the changeset viewer.
