Ignore:
Timestamp:
02/07/12 12:08:02 (2 years ago)
Author:
lanumek
Branch:
default
rebase_source:
1b75fef53affa71b6a971a946f2a5f27c1ad96a3
Message:

Test scrip for hierarchical clustering.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • Orange/clustering/hierarchical.py

    r9752 r9906  
    8181         
    8282        :param matrix: A distance matrix to perform the clustering on. 
    83         :type matrix: :class:`Orange.core.SymMatrix` 
     83        :type matrix: :class:`Orange.misc.SymMatrix` 
    8484 
    8585 
     
    157157 
    158158Let us construct a simple distance matrix and run clustering on it. 
    159 :: 
    160  
    161     import Orange 
    162     from Orange.clustering import hierarchical 
    163     m = [[], 
    164          [ 3], 
    165          [ 2, 4], 
    166          [17, 5, 4], 
    167          [ 2, 8, 3, 8], 
    168          [ 7, 5, 10, 11, 2], 
    169          [ 8, 4, 1, 5, 11, 13], 
    170          [ 4, 7, 12, 8, 10, 1, 5], 
    171          [13, 9, 14, 15, 7, 8, 4, 6], 
    172          [12, 10, 11, 15, 2, 5, 7, 3, 1]] 
    173     matrix = Orange.core.SymMatrix(m) 
    174     root = hierarchical.HierarchicalClustering(matrix, 
    175             linkage=hierarchical.HierarchicalClustering.Average) 
     159 
     160.. literalinclude:: code/hierarchical-example.py 
     161    :lines: 1-14 
    176162     
    177163Root is a root of the cluster hierarchy. We can print using a 
    178164simple recursive function. 
    179 :: 
    180  
    181     def printClustering(cluster): 
    182         if cluster.branches: 
    183             return "(%s%s)" % (printClustering(cluster.left), printClustering(cluster.right)) 
    184         else: 
    185             return str(cluster[0]) 
     165 
     166.. literalinclude:: code/hierarchical-example.py 
     167    :lines: 16-20 
    186168             
    187169The output is not exactly nice, but it will have to do. Our clustering, 
     
    211193supposedly the only) element of cluster, cluster[0], we shall print 
    212194it out as a tuple.  
    213 :: 
    214  
    215     def printClustering2(cluster): 
    216         if cluster.branches: 
    217             return "(%s%s)" % (printClustering2(cluster.left), printClustering2(cluster.right)) 
    218         else: 
    219             return str(tuple(cluster)) 
     195 
     196.. literalinclude:: code/hierarchical-example.py 
     197    :lines: 22-26 
    220198             
    221199The distance matrix could have been given a list of objects. We could, 
    222200for instance, put 
    223 :: 
    224      
    225     matrix.objects = ["Ann", "Bob", "Curt", "Danny", "Eve", 
    226                       "Fred", "Greg", "Hue", "Ivy", "Jon"] 
     201     
     202.. literalinclude:: code/hierarchical-example.py 
     203    :lines: 28-29 
    227204 
    228205above calling the HierarchicalClustering. 
     
    234211If we've forgotten to store the objects into matrix prior to clustering, 
    235212nothing is lost. We can add it into clustering later, by 
    236 :: 
    237  
    238     root.mapping.objects = ["Ann", "Bob", "Curt", "Danny", "Eve", "Fred", "Greg", "Hue", "Ivy", "Jon"] 
     213 
     214.. literalinclude:: code/hierarchical-example.py 
     215    :lines: 31 
    239216     
    240217So, what do these "objects" do? Call printClustering(root) again and you'll 
     
    269246of ``root.left`` and ``root.right``. 
    270247 
    271 Let us write function for cluster pruning. :: 
    272  
    273     def prune(cluster, togo): 
    274         if cluster.branches: 
    275             if togo<0: 
    276                 cluster.branches = None 
    277             else: 
    278                 for branch in cluster.branches: 
    279                     prune(branch, togo-cluster.height) 
     248Let us write function for cluster pruning. 
     249 
     250.. literalinclude:: code/hierarchical-example.py 
     251    :lines: 33-39 
    280252 
    281253We shall use ``printClustering2`` here, since we can have multiple elements 
     
    287259     
    288260We've ended up with four clusters. Need a list of clusters? 
    289 Here's the function. :: 
    290      
    291     def listOfClusters0(cluster, alist): 
    292         if not cluster.branches: 
    293             alist.append(list(cluster)) 
    294         else: 
    295             for branch in cluster.branches: 
    296                 listOfClusters0(branch, alist) 
    297                  
    298     def listOfClusters(root): 
    299         l = [] 
    300         listOfClusters0(root, l) 
    301         return l 
     261Here's the function. 
     262 
     263.. literalinclude:: code/hierarchical-example.py 
     264    :lines: 41-51 
    302265         
    303266The function returns a list of lists, in our case 
     
    313276and cluster it with average linkage. Since we don't need the matrix, 
    314277we shall let the clustering overwrite it (not that it's needed for 
    315 such a small data set as Iris). :: 
    316  
    317     import Orange 
    318     from Orange.clustering import hierarchical 
    319  
    320     data = Orange.data.Table("iris") 
    321     matrix = Orange.core.SymMatrix(len(data)) 
    322     matrix.setattr("objects", data) 
    323     distance = Orange.distance.Euclidean(data) 
    324     for i1, instance1 in enumerate(data): 
    325         for i2 in range(i1+1, len(data)): 
    326             matrix[i1, i2] = distance(instance1, data[i2]) 
    327              
    328     clustering = hierarchical.HierarchicalClustering() 
    329     clustering.linkage = clustering.Average 
    330     clustering.overwrite_matrix = 1 
    331     root = clustering(matrix) 
     278such a small data set as Iris). 
     279 
     280.. literalinclude:: code/hierarchical-example-2.py 
     281    :lines: 1-15 
    332282 
    333283Note that we haven't forgotten to set the ``matrix.objects``. We did it 
    334284through ``matrix.setattr`` to avoid the warning. Let us now prune the 
    335285clustering using the function we've written above, and print out the 
    336 clusters. :: 
    337      
    338     prune(root, 1.4) 
    339     for n, cluster in enumerate(listOfClusters(root)): 
    340         print "\n\n Cluster %i \n" % n 
    341         for instance in cluster: 
    342             print instance 
     286clusters. 
     287     
     288.. literalinclude:: code/hierarchical-example-2.py 
     289    :lines: 16-20 
    343290             
    344291Since the printout is pretty long, it might be more informative to just 
    345 print out the class distributions for each cluster. :: 
    346      
    347     for cluster in listOfClusters(root): 
    348         dist = Orange.core.get_class_distribution(cluster) 
    349         for e, d in enumerate(dist): 
    350             print "%s: %3.0f " % (data.domain.class_var.values[e], d), 
    351         print 
     292print out the class distributions for each cluster. 
     293     
     294.. literalinclude:: code/hierarchical-example-2.py 
     295    :lines: 22-26 
    352296         
    353297Here's what it shows. :: 
     
    365309instance, call a learning algorithms, passing a cluster as an argument. 
    366310It won't mind. If you, however, want to have a list of table, you can 
    367 easily convert the list by :: 
    368  
    369     tables = [Orange.data.Table(cluster) for cluster in listOfClusters(root)] 
     311easily convert the list by 
     312 
     313.. literalinclude:: code/hierarchical-example-2.py 
     314    :lines: 28 
    370315     
    371316Finally, if you are dealing with examples, you may want to take the function 
     
    502447    """ 
    503448    distance = distance_constructor(data) 
    504     matrix = orange.SymMatrix(len(data)) 
     449    matrix = Orange.misc.SymMatrix(len(data)) 
    505450    for i in range(len(data)): 
    506451        for j in range(i+1): 
     
    540485     
    541486    """ 
    542     matrix = orange.SymMatrix(len(data.domain.attributes)) 
     487    matrix = Orange.misc.SymMatrix(len(data.domain.attributes)) 
    543488    for a1 in range(len(data.domain.attributes)): 
    544489        for a2 in range(a1): 
     
    618563    :type tree: :class:`HierarchicalCluster` 
    619564    :param matrix: SymMatrix that was used to compute the clustering. 
    620     :type matrix: :class:`Orange.core.SymMatrix` 
     565    :type matrix: :class:`Orange.misc.SymMatrix` 
    621566    :param progress_callback: Function used to report on progress. 
    622567    :type progress_callback: function 
     
    811756    :type tree: :class:`HierarchicalCluster` 
    812757    :param matrix: SymMatrix that was used to compute the clustering. 
    813     :type matrix: :class:`Orange.core.SymMatrix` 
     758    :type matrix: :class:`Orange.misc.SymMatrix` 
    814759    :param progress_callback: Function used to report on progress. 
    815760    :type progress_callback: function 
     
    15111456 
    15121457def feature_distance_matrix(data, distance=None, progress_callback=None): 
    1513     """ A helper function that computes an :class:`Orange.core.SymMatrix` of 
     1458    """ A helper function that computes an :class:`Orange.misc.SymMatrix` of 
    15141459    all pairwise distances between features in `data`. 
    15151460     
     
    15241469    :type progress_callback: function 
    15251470     
    1526     :rtype: :class:`Orange.core.SymMatrix` 
     1471    :rtype: :class:`Orange.misc.SymMatrix` 
    15271472     
    15281473    """ 
    15291474    attributes = data.domain.attributes 
    1530     matrix = orange.SymMatrix(len(attributes)) 
     1475    matrix = Orange.misc.SymMatrix(len(attributes)) 
    15311476    iter_count = matrix.dim * (matrix.dim - 1) / 2 
    15321477    milestones = progress_bar_milestones(iter_count, 100) 
     
    15811526    :type cluster: :class:`HierarchicalCluster` 
    15821527     
    1583     :rtype: :class:`Orange.core.SymMatrix` 
     1528    :rtype: :class:`Orange.misc.SymMatrix` 
    15841529     
    15851530    """ 
    15861531 
    15871532    mapping = cluster.mapping   
    1588     matrix = Orange.core.SymMatrix(len(mapping)) 
     1533    matrix = Orange.misc.SymMatrix(len(mapping)) 
    15891534    for cluster in postorder(cluster): 
    15901535        if cluster.branches: 
     
    16241569     
    16251570     
    1626 if __name__=="__main__": 
    1627     data = orange.ExampleTable("doc//datasets//brown-selected.tab") 
    1628 #    data = orange.ExampleTable("doc//datasets//iris.tab") 
    1629     root = hierarchicalClustering(data, order=True) #, linkage=orange.HierarchicalClustering.Single) 
    1630     attr_root = hierarchicalClustering_attributes(data, order=True) 
    1631 #    print root 
    1632 #    d = DendrogramPlotPylab(root, data=data, labels=[str(ex.getclass()) for ex in data], dendrogram_width=0.4, heatmap_width=0.3,  params={}, cmap=None) 
    1633 #    d.plot(show=True, filename="graph.png") 
    1634  
    1635     dendrogram_draw("graph.eps", root, attr_tree=attr_root, data=data, labels=[str(e.getclass()) for e in data], tree_height=50, #width=500, height=500, 
    1636                           cluster_colors={root.right:(255,0,0), root.right.right:(0,255,0)},  
    1637                           color_palette=ColorPalette([(255, 0, 0), (0,0,0), (0, 255,0)], gamma=0.5,  
    1638                                                      overflow=(255, 255, 255), underflow=(255, 255, 255))) #, minv=-0.5, maxv=0.5) 
Note: See TracChangeset for help on using the changeset viewer.