Changeset 936:5878bd4b8bfd in orange-bioinformatics for obiGsea.py


Ignore:
Timestamp:
07/28/09 14:13:25 (4 years ago)
Author:
markotoplak
Branch:
default
Convert:
1ef64a4c36f201cbd5edbe5c3a297b59e246326b
Message:

obiGsea: transposing of data sets works. widget and documentation are not done yet.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • obiGsea.py

    r934 r936  
    673673    and names of genes as attribute names. 
    674674 
     675    If phenVar is False, then we can work, then the input already 
     676    consists of scores of differential expressions 
     677 
    675678    If we have a single column, transpose it.  
    676679    If phenVar is one of the groups, transpose the matrix. 
    677680    """ 
    678681 
    679     def transpose_data(data): 
    680         columns = [a for a in data.domain] +  [ data.domain.getmeta(a) for a in list(data.domain.getmetas()) ] 
    681         floatvars = [ a for a in columns if a.varType == orange.VarTypes.Continuous ] 
    682         if len(floatvars) == 1: 
    683             floatvar = floatvars[0] 
    684             stringvar = [ a for a in columns if a.varType == 6 ][0] 
    685  
    686             tup = [ (ex[stringvar].value, ex[floatvar].value) for ex in data ] 
    687             newdom = orange.Domain([orange.FloatVariable(name=a[0]) for a in tup ], False) 
    688             example = [ a[1] for a in tup ] 
    689             ndata = orange.ExampleTable(newdom, [example]) 
    690             return ndata 
    691         return data 
    692  
    693     #transform every example table example tables 
    694  
     682    def prepare_data(data, phenVar=None, geneVar=None): 
     683 
     684        def rorq(a, name): 
     685            """ Group annatation or question mark. """ 
     686            try:  
     687                return a.attributes[name] 
     688            except:  
     689                return '?' 
     690    
     691        #use class as phenotype by default, if it is present, 
     692        #if not, do not use any phenotype! 
     693        if phenVar == None:  
     694            if not data.domain.classVar: 
     695                phenVar = False 
     696            else: 
     697                phenVar = data.domain.classVar 
     698 
     699 
     700        #TODO validate phenVar and geneVar? 
     701        #TODO autodetection of groups? 
     702 
     703        #transpose is not needed if phenVar is classVar or phenVar is False 
     704        #and there is only one sample 
     705        if phenVar == data.domain.classVar or \ 
     706            (phenVar == False and len(data) == 1): 
     707 
     708            if geneVar == None: #if not specified, set as true in this stage 
     709                geneVar = True 
     710 
     711            floatvars = [ a for a in data.domain.attributes \ 
     712                if a.varType == orange.VarTypes.Continuous ] 
     713 
     714            #rename attributes without touching the original variable 
     715            if geneVar != True: 
     716                fl2 = [] 
     717 
     718                for a in floatvars: 
     719                    na = orange.FloatVariable(name=rorq(a, geneVar)) 
     720                    na.getValueFrom = lambda e, rw: e[a] 
     721                    fl2.append(na) 
     722 
     723                floatvars = fl2 
     724 
     725            dom = orange.Domain(floatvars, phenVar) 
     726            return orange.ExampleTable(dom, data) 
     727 
     728        elif phenVar == False or phenVar != data.domain.classVar: 
     729 
     730            cands = allgroups(data) 
     731            pv = False 
     732            if phenVar != False: 
     733                pv = orange.EnumVariable(name="phenotype",  
     734                    values=list(cands[phenVar])) 
     735 
     736            #take the only string attribute as a gene name 
     737            gc = gene_cands(data, False) 
     738            if geneVar == None: 
     739                if len(gc) == 1: 
     740                    geneVar = gc[0] 
     741                else: 
     742                    geneNamesUnspecifiedError() 
     743            
     744            latts = [ orange.FloatVariable(name=ex[geneVar].value) \ 
     745                for ex in data ] 
     746 
     747            domain = orange.Domain(latts, pv) 
     748 
     749            examples = [] 
     750            for at in data.domain.attributes: 
     751                if at.varType == orange.VarTypes.Continuous: 
     752                    vals = [ ex[at].value for ex in data ] 
     753                    if pv != False: #add class value 
     754                        vals.append(rorq(at, phenVar)) 
     755                    examples.append(orange.Example(domain, vals)) 
     756 
     757            return orange.ExampleTable(domain, examples) 
     758        else: 
     759            wrongInputsError() 
     760 
     761    #transform all example tables 
    695762    single = iset(data) 
    696     transposed = [ transpose_data(d) for d in wrap_in_list(data) ] 
     763    transposed = [ prepare_data(d, phenVar, geneVar) for d in wrap_in_list(data) ] 
    697764 
    698765    if single: 
     
    725792    return filter(lambda x: len(x[1]) >= 2, cands) 
    726793 
    727 def gene_cands(data, phenVar): 
     794def gene_cands(data, correct): 
    728795    """ 
    729796    Returns all valid gene descriptors with regards to the choosen 
     
    732799    descriptions in attr.attributes and True for the usage 
    733800    of attribute names. 
    734     """ 
    735     if is_variable(phenVar[0]): 
     801    Correct is True, if the example table has genes as attributes. 
     802    """ 
     803    if correct: 
    736804        #gene names could be in attributes or as gene names (marker True) 
    737805        return [True] + nth(sorted(allgroups(data)),0) 
     
    894962    """ 
    895963    gso = GSEA(data, organism=organism, matcher=matcher,  
    896         classValues=classValues, atLeast=atLeast, caseSensitive=caseSensitive) 
     964        classValues=classValues, atLeast=atLeast, caseSensitive=caseSensitive, 
     965        geneVar=geneVar, phenVar=phenVar) 
    897966    if geneSets == None: 
    898967        genesets = collections(default=True) 
    899968    gso.addGenesets(geneSets) 
    900969    res1 = gso.compute(n=n, permutation=permutation, minSize=minSize, 
    901         maxSize=maxSize, minPart=minPart, geneVar=geneVar, phenVar=phenVar, 
    902         **kwargs) 
     970        maxSize=maxSize, minPart=minPart, **kwargs) 
    903971    return res1 
    904972 
     
    9721040if  __name__=="__main__": 
    9731041 
    974     data = orange.ExampleTable("sterolTalkHepaM.tab") 
    975     print phenotype_cands(data) 
    976     print is_variable(phenotype_cands(data)[0][0]) 
    977  
    978     """ 
     1042    #data = orange.ExampleTable("sterolTalkHepa.tab") 
     1043 
    9791044    data = orange.ExampleTable("gene_three_lines_log.tab") 
    980     print phenotype_cands(data) 
    981     print is_variable(phenotype_cands(data)[0][0]) 
    982     """ 
    9831045 
    9841046    gen1 = collections(['steroltalk.gmt', ':kegg:hsa'], default=False) 
    985  
    986     gen1 = dict([ ('[KEGG] Complement and coagulation cascades', gen1['[KEGG] Complement and coagulation cascades'])]) 
     1047    #gen1 = dict([ ('[KEGG] Complement and coagulation cascades', gen1['[KEGG] Complement and coagulation cascades'])]) 
    9871048 
    9881049    rankingf = rankingFromOrangeMeas(MA_anova()) 
     1050 
    9891051    matcher = obiGene.matcher([obiGene.GMKEGG('hsa')]) 
    9901052 
    991     out = runGSEA(data, n=10, geneSets=gen1, permutation="gene", atLeast=3, matcher=matcher, rankingf=rankingf) 
     1053    #out = runGSEA(data, n=10, geneSets=gen1, permutation="gene", atLeast=3, matcher=matcher, rankingf=rankingf) 
     1054 
     1055    geneVar = gene_cands(data, False)[1] 
     1056    out = runGSEA(data, n=10, geneSets=gen1, permutation="gene", atLeast=3, matcher=matcher, rankingf=rankingf, phenVar="group", geneVar=geneVar) 
     1057 
    9921058    print "\n".join(map(str,sorted(out.items()))) 
    9931059     
Note: See TracChangeset for help on using the changeset viewer.