Changeset 1711:643d37885055 in orange-bioinformatics


Ignore:
Timestamp:
08/17/12 15:36:10 (20 months ago)
Author:
markotoplak
Branch:
default
rebase_source:
89e6b1f907eeaa5970e4e1b4ce03b3fc8ef6995e
Message:

obiGeneSetSig: speedup of ASSESS.

Location:
_bioinformatics
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiGeneSetSig.py

    r1709 r1711  
    217217        if self.rankingf == None: 
    218218            self.rankingf = AT_edelmanParametricLearner() 
     219        self.example_buffer = {} 
     220        self.attransv = 0 
    219221        super(Assess, self).__init__(**kwargs) 
     222 
     223    def _ordered_and_lcor(self, ex, nm, name_ind, attrans, attransv): 
     224        """ Buffered! It should be computed only once per example. """  
     225        #name_ind and nm are always co-created, so I need to have only one as a key 
     226        key = (ex, nm, attransv) 
     227        if key not in self.example_buffer: 
     228            ex_atts = [ at.name for at in ex.domain.attributes ] 
     229            new_atts = [ name_ind[nm.umatch(an)] if nm.umatch(an) != None else None 
     230                for an in ex_atts ] 
     231 
     232            #new_atts: indices of genes in original data for that sample  
     233            #POSSIBLE REVERSE IMPLEMENTATION (slightly different 
     234            #for data from different chips): 
     235            #save pairs together and sort (or equiv. dictionary transformation) 
     236 
     237            indexes = filter(lambda x: x[0] != None, zip(new_atts, range(len(ex_atts)))) 
     238 
     239            lcor = [ attrans[index_in_data](ex[index_in_ex].value)  
     240                for index_in_data, index_in_ex in indexes if 
     241                ex[index_in_ex].value != '?' ] 
     242            #indexes in original lcor, sorted from higher to lower values 
     243            ordered = obiGsea.orderedPointersCorr(lcor) 
     244            rev2 = numpy.argsort(ordered) 
     245            self.example_buffer[key] = lcor,ordered,rev2 
     246        return self.example_buffer[key] 
    220247 
    221248    def build_features(self, data, gene_sets): 
     
    225252        #attrans: { i_orig: ranking_function } 
    226253        attrans = [ self.rankingf(iat, data) for iat, at in enumerate(data.domain.attributes) ] 
     254        attransv = self.attransv 
     255        self.attransv += 1 
    227256 
    228257        nm_all, _ =  self._mat_ni(data) 
     
    234263            geneset = list(gs.genes) 
    235264            nm, name_ind, genes, takegenes, to_geneset = self._match_data(data, geneset, odic=True) 
     265            takegenes = [ geneset[i] for i in takegenes ] 
    236266            genes = set(genes) 
    237              
    238             def t(ex, w, geneset=geneset, takegenes=takegenes, nm=nm, attrans=attrans): 
    239  
    240                 nm2, name_ind2, genes2 = self._match_instance(ex, geneset, takegenes) 
    241  
    242                 ex_atts = [ at.name for at in ex.domain.attributes ] 
    243                 new_atts = [ name_ind[nm.umatch(an)] if nm.umatch(an) != None else None 
    244                     for an in ex_atts ] 
    245                 #new_atts: indices of genes in original data for that sample  
    246                 #POSSIBLE REVERSE IMPLEMENTATION (slightly different 
    247                 #for data from different chips): 
    248                 #save pairs together and sort (or equiv. dictionary transformation) 
    249  
    250                 indexes = filter(lambda x: x[0] != None, zip(new_atts, range(len(ex_atts)))) 
    251  
    252                 lcor = [ attrans[index_in_data](ex[index_in_ex].value)  
    253                     for index_in_data, index_in_ex in indexes if 
    254                     ex[index_in_ex].value != '?' ] 
    255                 #indexes in original lcor, sorted from higher to lower values 
    256                 ordered = obiGsea.orderedPointersCorr(lcor)  
     267 
     268            def t(ex, w, takegenes=takegenes, nm=nm, attrans=attrans, attransv=attransv): 
     269 
     270                nm2, name_ind2, genes2 = self._match_instance(ex, takegenes) 
     271                lcor, ordered, rev2 = self._ordered_and_lcor(ex, nm, name_ind, attrans, attransv) 
     272 
    257273                #subset = list of indices, lcor = correlations, ordered = order 
    258274                subset = [ name_ind2[g] for g in genes2 ] 
    259                 return obiGsea.enrichmentScoreRanked(subset, lcor, ordered)[0]  
     275                return obiGsea.enrichmentScoreRanked(subset, lcor, ordered, rev2=rev2)[0]  
    260276 
    261277            at.get_value_from = t 
  • _bioinformatics/obiGsea.py

    r1706 r1711  
    4747    """ 
    4848    ordered = [ (i,a) for i,a in enumerate(lcor) ] #original pos + correlation 
    49     ordered.sort(lambda x,y: cmp(y[1],x[1])) #sort by correlation, descending 
     49    ordered.sort(key=lambda x: -x[1]) #sort by correlation, descending 
    5050    ordered = nth(ordered, 0) #contains positions in the original list 
    5151    return ordered 
     
    7777    notInA = -(1. / (len(lcor)-len(subset))) 
    7878    #base for addition if gene is in the subset 
    79     cors = [ abs(lcor[i])**p for i in subset ] 
     79 
     80    cors = [ abs(lcor[i])**p for i in subset ] #belowe in numpy 
    8081    sumcors = sum(cors) 
    8182 
Note: See TracChangeset for help on using the changeset viewer.