Changeset 1573:f7cad4491971 in orange-bioinformatics


Ignore:
Timestamp:
02/20/12 17:25:19 (2 years ago)
Author:
markotoplak
Branch:
default
rebase_source:
3352d103b5f3990e295602c2ef23eafb74205f5b
Message:

Deduplicated code from obiExperiments in OWGenotypeDistances. OWQualityControl: python 2.6 support.

Location:
widgets
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • widgets/OWGenotypeDistances.py

    r1555 r1573  
    1616import math 
    1717 
    18 def data_type(vals): 
    19     try: 
    20         _ = [ int(a) for a in vals ] 
    21         return int 
    22     except: 
    23         try: 
    24             _ = [ float(a) for a in vals ] 
    25             return float 
    26         except: 
    27             return lambda x: x 
    28  
    29 def separate_by(data, separate, ignore=[], consider=None, add_empty=True): 
    30     """ 
    31     data - the data - annotations are saved in the at.attributes 
    32     annotatitions: keys of at.attributes  by which to separate 
    33     ignore: ignore values of these annotations 
    34     consider: consider only these annotations 
    35     """ 
    36     ignore = set(ignore) 
    37  
    38     annotations = [ at.attributes for at in data.domain.attributes ] 
    39  
    40     all_values = defaultdict(set) 
    41     for a in annotations: 
    42         for k,v in a.iteritems(): 
    43             all_values[k].add(v) 
    44  
    45     types = {} 
    46     for k,vals in all_values.iteritems(): 
    47         types[k] = data_type(vals) 
    48      
    49     groups = defaultdict(list) 
    50     for i,a in enumerate(annotations): 
    51         groups[tuple(a[k] for k in separate)].append(i) 
    52  
    53     different_in_all = set(k \ 
    54         for k,vals in all_values.iteritems() \ 
    55         if len(vals) == len(annotations) or len(vals) == 1) 
    56  
    57     other_relevant = set(all_values.keys()) - different_in_all - ignore - set(separate) 
    58     if consider != None: 
    59         other_relevant &= set(consider) 
    60     other_relevant = sorted(other_relevant) #TODO how to order them? 
    61  
    62     def relevant_vals(annotation): 
    63         if isinstance(annotation, tuple): 
    64             return annotation 
    65         return tuple(types[v](annotation[v]) for v in other_relevant) 
    66  
    67     other_relevant_d2 = defaultdict(int) #"multiset" - number 
    68     #of maximum occurances of a relevant value in a group 
    69     for _,g in groups.items(): 
    70         d = defaultdict(int) 
    71         for i in g: 
    72             d[relevant_vals(annotations[i])] += 1 
    73         for rv,n in d.items(): 
    74             if n > other_relevant_d2[rv]: 
    75                 other_relevant_d2[rv] = n 
    76      
    77     if add_empty: #fill in with "empty" relevant vals 
    78         ngroups = {} 
    79         for g in groups: 
    80             need_to_fill = other_relevant_d2.copy() 
    81             for i in groups[g]: 
    82                 need_to_fill[relevant_vals(annotations[i])] -= 1 
    83             add = [] 
    84             for rv,num in need_to_fill.items(): 
    85                 for a in range(num): 
    86                     add.append(rv) 
    87             ngroups[g] = groups[g] + add 
    88         groups = ngroups 
    89  
    90     ngroups = {} 
    91     uniquepos = {} #which positions are unique 
    92     for g in groups: 
    93         elements = list(groups[g]) 
    94  
    95         rv2 = lambda x: relevant_vals(annotations[x] if isinstance(x,int) else x) 
    96  
    97         ngroups[g] = map(lambda x: x if isinstance(x,int) else None, 
    98             sorted(elements, key=rv2)) 
    99  
    100         d = defaultdict(int) #get groups of different relevant values 
    101         for i in elements: 
    102             d[rv2(i)] += 1 
    103          
    104         uniquepos[g] = map(lambda x: not d[rv2(x)] > 1, 
    105              sorted(elements, key=rv2)) 
    106      
    107     return ngroups, uniquepos 
    108  
    109 def float_or_none(value): 
    110     return value.value if value.value != "?" else None 
    111  
    112 def linearize(data, ids): 
    113     """ Returns a list of floats in the data subspace (or None's 
    114     if the values are unknown or not present. """ 
    115     l = [ [ None ] * len(data) if id1 == None \ 
    116         else [ float_or_none(ex[id1]) for ex in data ] for id1 in ids ] 
    117     l = reduce(add, l) 
    118     return l 
    119  
    120 def pearson_lists(l1, l2): 
    121     """ Returns pearson correlation between two lists. Ignores elements 
    122     which are None.""" 
    123     okvals = [ (a,b) for a,b in zip(l1,l2) if a != None and b != None ] 
    124     return numpy.corrcoef([ [ v[0] for v in okvals], [ v[1] for v in okvals] ])[0,1] 
    125  
    126 def euclidean_lists(l1, l2): 
    127     """ Returns pearson correlation between two lists. Ignores elements 
    128     which are None.""" 
    129     okvals = [ (a,b) for a,b in zip(l1,l2) if a != None and b != None ] 
    130     return math.sqrt( sum((a-b)*(a-b) for a,b in okvals )) 
    131  
    132 def spearman_lists(l1, l2): 
    133     """ Returns pearson correlation between two lists. Ignores elements 
    134     which are None.""" 
    135     import scipy.stats 
    136     okvals = [ (a,b) for a,b in zip(l1,l2) if a != None and b != None ] 
    137     #print okvals, len(okvals) 
    138     return scipy.stats.spearmanr([ v[0] for v in okvals], [ v[1] for v in okvals] )[0] 
    139  
    140 def dist_spearman(l1, l2): 
    141     return (1.-spearman_lists(l1, l2))/2 
    142  
    143 def dist_pcorr(l1, l2): 
    144     #normalized to 0..1 
    145     return (1.-pearson_lists(l1, l2))/2 
    146  
    147 def dist_eucl(l1, l2): 
    148     return euclidean_lists(l1, l2) 
    149  
     18from obiExperiments import separate_by, data_type, linearize, dist_pcorr, dist_eucl, dist_spearman 
    15019 
    15120def clone_attr(attr): 
  • widgets/OWQualityControl.py

    r1567 r1573  
    5454    """Return group label. 
    5555    """ 
    56     labels = ["{}={}".format(split, group) \ 
     56    labels = ["%s=%s" % (split, group) \ 
    5757              for split, group in zip(splits, groups)] 
    5858    return " | ".join(labels) 
     
    6464    items = [(key, attr.attributes.get(key, "?")) \ 
    6565             for key in sort] 
    66     labels = ["{}={}".format(*item) for item in items] 
     66    labels = ["%s=%s" % tuple(item) for item in items] 
    6767    return " | ".join(labels) 
    6868 
     
    282282         
    283283        self.update_label_candidates() 
    284         self.info_box.setText("{} genes \n{} experiments".format( 
     284        self.info_box.setText("%s genes \n%s experiments" % ( 
    285285                                len(self.data),  
    286286                                len(self.data.domain.attributes) 
Note: See TracChangeset for help on using the changeset viewer.