Changeset 1772:58718fc3f736 in orange-bioinformatics


Ignore:
Timestamp:
05/06/13 20:57:45 (12 months ago)
Author:
markotoplak
Branch:
default
Message:

obiGeneSetSig: CORG were build a gene too large (fixed). SetSig has the option to check if the examples are exactly alike and discard the distance between them.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiGeneSetSig.py

    r1728 r1772  
    7474            # while the transformed data table uses cross-validation 
    7575            # internally 
    76             folds = 5 
    77             cvi = Orange.data.sample.SubsetIndicesCV(data, folds) 
     76            if self.cv == True: 
     77                cvi = Orange.data.sample.SubsetIndicesCV(data, 5) 
     78            elif self.cv != False: 
     79                cvi = self.cv(data) 
    7880            data_cv = [ [] for _ in range(len(data)) ] 
    79             for f in range(folds): 
     81            for f in set(cvi): 
    8082                learn = data.select(cvi, f, negate=True) 
    8183                test = data.select(cvi, f) 
     
    289291        return attributes 
    290292    
    291 def setSig_example_geneset(ex, data, no_unknowns): 
     293def setSig_example_geneset(ex, data, no_unknowns, check_same=False): 
    292294    """ Gets learning data and example with the same domain, both 
    293295    containing only genes from the gene set. """ 
     
    298300        vals1 = ex1.native(0)[:-1] 
    299301        vals2 = ex2.native(0)[:-1] 
     302 
     303        if check_same and vals1 == vals2: 
     304            return 10 #they are the same 
    300305 
    301306        #leaves undefined elements out 
     
    324329    #create distances to all learning data - save or other class 
    325330    for c in data: 
    326         distances[classValueMap[c[-1].value]].append(pearson(c, ex)) 
     331        p = pearson(c, ex) 
     332        if p != 10: 
     333             distances[classValueMap[c[-1].value]].append(pearson(c, ex)) 
    327334 
    328335    return ttest(distances[0], distances[1]) 
     
    358365    def __init__(self, **kwargs): 
    359366        self.no_unknowns = kwargs.pop("no_unknowns", False) 
     367        self.check_same = kwargs.pop("check_same", False) 
    360368        super(SetSig, self).__init__(**kwargs) 
    361369 
     
    378386            example = Orange.data.Instance(domain, exvalues) 
    379387 
    380             return setSig_example_geneset(example, datao, self.no_unknowns) #only this one is setsig specific 
     388            return setSig_example_geneset(example, datao, self.no_unknowns, check_same=self.check_same) #only this one is setsig specific 
    381389      
    382390        at.get_value_from = t 
     
    412420 
    413421            return self._use_par(ex, constructt) 
    414              
     422         
    415423        at.get_value_from = t 
     424        at.dbg = constructt #for debugging 
     425         
    416426        return at 
    417427 
     
    569579    for a in range(2, len(sortedinds)+1): 
    570580        tg = S(sortedinds[:a]) 
    571         if tg > g: 
     581        if tg > g: #improvement 
    572582            g = tg 
    573583            bg = a 
     
    575585            break 
    576586         
    577     return sortedinds[:a] 
     587    return sortedinds[:bg] #FIXED - one too many was taken 
    578588 
    579589class CORGs(ParametrizedTransformation): 
Note: See TracChangeset for help on using the changeset viewer.