Changeset 1600:60f301f74126 in orange-bioinformatics for obiGeneSetSig.py


Ignore:
Timestamp:
03/14/12 12:48:30 (2 years ago)
Author:
markotoplak
Branch:
default
Message:

Added CORGs to obiGeneSetSig.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • obiGeneSetSig.py

    r1599 r1600  
    77import stats 
    88from obiGsea import takeClasses 
    9 from obiAssess import pca, PLSCall 
     9from obiAssess import pca, PLSCall, corgs_activity_score 
    1010import obiExpression 
    1111import scipy.stats 
    1212 
    13 #STILL MISSING: Assess, CORGs 
     13#STILL MISSING: Assess 
    1414 
    1515def setSig_example_geneset(ex, data): 
     
    300300        return attributes 
    301301 
     302def tscorec(data, at, cache=None): 
     303    """ Cached attribute  tscore calculation """ 
     304    if cache != None and at in cache: return cache[at] 
     305    ma = obiExpression.MA_t_test()(at,data) 
     306    if cache != None: cache[at] = ma 
     307    return ma 
     308 
     309def nth(l, n): 
     310    return [a[n] for a in l] 
     311 
     312def compute_corg(data, inds, tscorecache): 
     313    """ 
     314    Compute CORG for this geneset specified with gene inds 
     315    in the example table. Output is the list of gene inds 
     316    in CORG. 
     317 
     318    """ 
     319    #order member genes by their t-scores: decreasing, if av(t-score) >= 0, 
     320    #else increasing 
     321    tscores = [ tscorec(data, at, tscorecache) for at in inds ] 
     322    sortedinds = nth(sorted(zip(inds,tscores), key=lambda x: x[1], \ 
     323        reverse=numpy.mean(tscores) >= 0), 0) 
     324 
     325    def S(corg): 
     326        """ Activity score separation - S(G) in  
     327        the article """ 
     328        asv = Orange.feature.Continuous(name='AS') 
     329        asv.getValueFrom = lambda ex,rw: Orange.data.Value(asv, corgs_activity_score(ex, corg)) 
     330        data2 = Orange.data.Table(Orange.data.Domain([asv], data.domain.classVar), data) 
     331        return abs(tscorec(data2, 0)) #FIXME absolute - nothing in the article abs() 
     332             
     333    #greedily find CORGS procing the best separation 
     334    g = S(sortedinds[:1]) 
     335    bg = 1 
     336    for a in range(2, len(sortedinds)+1): 
     337        tg = S(sortedinds[:a]) 
     338        if tg > g: 
     339            g = tg 
     340            bg = a 
     341        else: 
     342            break 
     343         
     344    return sortedinds[:a] 
     345 
     346class CORGs(ParametrizedTransformation): 
     347    """ 
     348    WARNING: input has to be z_ij table! each gene needs to be normalized 
     349    (mean=0, stdev=1) for all samples. 
     350    """ 
     351 
     352    def __call__(self, *args, **kwargs): 
     353        self.tscorecache = {} #reset a cache 
     354        return super(CORGs, self).__call__(*args, **kwargs) 
     355 
     356    def build_feature(self, data, gs): 
     357 
     358        at = Orange.feature.Continuous(name=str(gs)) 
     359        geneset = list(gs.genes) 
     360 
     361        nm, name_ind, genes, takegenes, to_geneset = self._match_data(data, geneset, odic=True) 
     362        indices = compute_corg(data, [ name_ind[g] for g in genes ], self.tscorecache) 
     363 
     364        ind_names = dict( (a,b) for b,a in name_ind.items() ) 
     365        selected_genes = sorted(set([to_geneset[ind_names[i]] for i in indices])) 
     366             
     367        def t(ex, w, corg=selected_genes): #copy od the data 
     368            nm2, name_ind2, genes2 = self._match_instance(ex, corg, None) 
     369            exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] 
     370            return sum(v if v != '?' else 0.0 for v in exvalues)/len(corg)**0.5 
     371      
     372        at.get_value_from = t 
     373        return at 
     374 
    302375if __name__ == "__main__": 
    303376 
     
    330403        print '\n'.join([ a + ": " +str(b) for a,b in ol]) 
    331404 
    332     ass = PCA(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0) 
     405    ass = CORGs(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0) 
    333406    ar = to_old_dic(ass.domain, data[:5]) 
    334407    pp2(ar) 
Note: See TracChangeset for help on using the changeset viewer.