Changeset 1362:5f56b6ad5f8f in orange-bioinformatics


Ignore:
Timestamp:
04/08/11 16:01:46 (3 years ago)
Author:
markotoplak
Branch:
default
Convert:
16c81910348d6448f7faa58033c3f0fd45248055
Message:

distance computation functions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • widgets/prototypes/OWGenotypeDistances.py

    r1361 r1362  
    88 
    99from collections import defaultdict 
     10import numpy 
     11import math 
    1012 
    1113def separate_by(data, separate, ignore=[], consider=None, add_empty=True): 
     
    7476         
    7577    return ngroups 
     78 
     79def float_or_none(value): 
     80    return value.value if value.value != "?" else None 
     81 
     82def linearize(data, ids): 
     83    """ Returns a list of floats in the data subspace (or None's 
     84    if the values are unknown or not present. """ 
     85    l = [ [ None ] * len(data) if id1 == None \ 
     86        else [ float_or_none(ex[id1]) for ex in data ] for id1 in ids ] 
     87    l = reduce(lambda x,y: x+y, l) 
     88    return l 
     89 
     90def pearson_lists(l1, l2): 
     91    """ Returns pearson correlation between two lists. Ignores elements 
     92    which are None.""" 
     93    okvals = [ (a,b) for a,b in zip(l1,l2) if a != None and b != None ] 
     94    return numpy.corrcoef([ [ v[0] for v in okvals], [ v[1] for v in okvals] ])[0,1] 
     95 
     96def euclidean_lists(l1, l2): 
     97    """ Returns pearson correlation between two lists. Ignores elements 
     98    which are None.""" 
     99    okvals = [ (a,b) for a,b in zip(l1,l2) if a != None and b != None ] 
     100    return math.sqrt( sum((a-b)*(a-b) for a,b in okvals )) 
     101 
     102def dist_pcorr(l1, l2): 
     103    #normalized to 0..1 
     104    return (1-pearson_lists(l1, l2))/2 
     105 
     106def dist_eucl(l1, l2): 
     107    return euclidean_lists(l1, l2) 
    76108 
    77109class MyHeaderView(QHeaderView): 
     
    358390        #Compute distances here 
    359391         
    360          
    361 if __name__ == "__main__": 
     392data = Orange.data.Table("tmp.tab") 
     393partitions = separate_by(data, [ "genotype" ], consider=["tp", "replicate"]).items() 
     394print partitions 
     395l1 = linearize(data, partitions[0][1]) 
     396l2 = linearize(data, partitions[1][1]) 
     397print  dist_eucl(l1, l2) 
     398print  dist_pcorr(l1, l2) 
     399 
     400 
     401if __name__ == "__main1__": 
    362402    import os, sys 
    363403    app = QApplication(sys.argv ) 
Note: See TracChangeset for help on using the changeset viewer.