Changeset 10046:207f641d0485 in orange


Ignore:
Timestamp:
02/08/12 09:59:34 (2 years ago)
Author:
markotoplak
Branch:
default
rebase_source:
f9b5f7b1eeb6cfd4edec41d813e96e62b69f93dd
Message:

Shortened hoerarchical clustering documentation. Some data.variable -> feature.

Location:
Orange
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • Orange/OrangeWidgets/Data/OWEditDomain.py

    r9671 r10046  
    1616 
    1717def is_discrete(var): 
    18     return isinstance(var, Orange.data.variable.Discrete) 
     18    return isinstance(var, Orange.feature.Discrete) 
    1919 
    2020def is_continuous(var): 
    21     return isinstance(var, Orange.data.variable.Continuous) 
     21    return isinstance(var, Orange.feature.Continuous) 
    2222 
    2323def get_qualified(module, name): 
  • Orange/OrangeWidgets/Evaluate/OWReliability.py

    r9671 r10046  
    191191                    self.train_data_has_no_class = True 
    192192                elif not isinstance(data.domain.class_var, 
    193                                     Orange.data.variable.Continuous): 
     193                                    Orange.feature.Continuous): 
    194194                    self.train_data_has_discrete_class = True 
    195195                     
  • Orange/OrangeWidgets/OWItemModels.py

    r9671 r10046  
    313313         
    314314    def variable_tooltip(self, var): 
    315         if isinstance(var, Orange.data.variable.Discrete): 
     315        if isinstance(var, Orange.feature.Discrete): 
    316316            return self.discrete_variable_tooltip(var) 
    317         elif isinstance(var, Orange.data.variable.Continuous): 
     317        elif isinstance(var, Orange.feature.Continuous): 
    318318            return self.continuous_variable_toltip(var) 
    319         elif isinstance(var, Orange.data.variable.String): 
     319        elif isinstance(var, Orange.feature.String): 
    320320            return self.string_variable_tooltip(var) 
    321321         
  • Orange/OrangeWidgets/OWNxHist.py

    r9671 r10046  
    185185                else: 
    186186                    data = [[str(x)] for x in self.matrix.items] 
    187                     items = Orange.data.Table(Orange.data.Domain(Orange.data.variable.String('label'), 0), data) 
     187                    items = Orange.data.Table(Orange.data.Domain(Orange.feature.String('label'), 0), data) 
    188188                    graph.set_items(items) 
    189189 
  • Orange/OrangeWidgets/Prototypes/OWGaussianMixtures.py

    r9671 r10046  
    8383        vars = [] 
    8484        for i, w in enumerate(self.gmm.weights): 
    85             var = Orange.data.variable.Continuous("Cluster {0}".format(i)) 
     85            var = Orange.feature.Continuous("Cluster {0}".format(i)) 
    8686            var.attributes["weight"] = str(w) 
    8787            vars.append(var) 
  • Orange/OrangeWidgets/Unsupervised/OWNxAnalysis.py

    r9671 r10046  
    263263                if job.type == NODELEVEL: 
    264264                    self.analfeatures.append((job.name, \ 
    265                                 Orange.data.variable.Continuous(job.label))) 
     265                                Orange.feature.Continuous(job.label))) 
    266266                    setattr(self, "lbl_" + job.name, "  finished") 
    267267                     
     
    299299            elif job.result is not None: 
    300300                if job.type == NODELEVEL: 
    301                     self.analfeatures.append((job.name, Orange.data.variable.Continuous(job.label))) 
     301                    self.analfeatures.append((job.name, Orange.feature.Continuous(job.label))) 
    302302                    self.analdata[job.name] = [job.result[node] for node in sorted(job.result.iterkeys())] 
    303303                     
  • Orange/OrangeWidgets/Unsupervised/OWNxExplorer.py

    r9671 r10046  
    14021402                keyword_table = self.graph_base.items() 
    14031403            else: 
    1404                 keyword_table = Orange.data.Table(Orange.data.Domain(Orange.data.variable.String('component name')), [[''] for i in range(len(self.graph_base.items()))]) 
     1404                keyword_table = Orange.data.Table(Orange.data.Domain(Orange.feature.String('component name')), [[''] for i in range(len(self.graph_base.items()))]) 
    14051405 
    14061406            import obiGO 
  • Orange/OrangeWidgets/Unsupervised/OWNxExplorerQwt.py

    r9671 r10046  
    644644            keyword_table = self.graph_base.items() 
    645645        else: 
    646             keyword_table = Orange.data.Table(Orange.data.Domain(Orange.data.variable.String('component name')), [[''] for i in range(len(self.graph_base.items()))]) 
     646            keyword_table = Orange.data.Table(Orange.data.Domain(Orange.feature.String('component name')), [[''] for i in range(len(self.graph_base.items()))]) 
    647647 
    648648        import obiGO 
     
    773773 
    774774        components = Orange.network.nx.algorithms.components.connected_components(self.graph) 
    775         keyword_table = Orange.data.Table(Orange.data.Domain(Orange.data.variables.String('component name')), [[''] for i in range(len(self.graph_base.items()))]) 
     775        keyword_table = Orange.data.Table(Orange.data.Domain(Orange.feature.String('component name')), [[''] for i in range(len(self.graph_base.items()))]) 
    776776 
    777777        excludeWord = ["AND", "OF", "KEGG", "ST", "IN", "SIG"] 
  • Orange/OrangeWidgets/Visualize Qt/OWNxExplorerQt.py

    r9671 r10046  
    480480            keyword_table = self.graph_base.items() 
    481481        else: 
    482             keyword_table = Orange.data.Table(Orange.data.Domain(Orange.data.variable.String('component name')), [[''] for i in range(len(self.graph_base.items()))])  
     482            keyword_table = Orange.data.Table(Orange.data.Domain(Orange.feature.String('component name')), [[''] for i in range(len(self.graph_base.items()))])  
    483483             
    484484        import obiGO  
  • Orange/clustering/hierarchical.py

    r9906 r10046  
    88.. index:: aglomerative clustering 
    99 
    10 The method for hierarchical clustering, encapsulated in class 
    11 :class:`HierarchicalClustering` works on a distance matrix stored as 
    12 :class:`SymMatrix`. The method works in approximately O(n2) time (with 
    13 the worst case O(n3)). For orientation, clustering ten thousand of  
    14 elements should take roughly 15 seconds on a 2 GHz computer.  
    15 The algorithm can either make a copy of the distances matrix and work on  
    16 it, or work on the original distance matrix, destroying it in the process.  
    17 The latter is useful for clustering larger number of objects. Since the  
    18 distance matrix stores (n+1)(n+2)/2 floats (cca 2 MB for 1000 objects and  
    19 200 MB for 10000, assuming the a float takes 4 bytes), by copying it we  
    20 would quickly run out of physical memory. Using virtual memory is not  
    21 an option since the matrix is accessed in a random manner. 
    22  
    23 The distance should contain no negative elements. This limitation is 
    24 due to implementation details of the algorithm (it is not absolutely  
    25 necessary and can be lifted in future versions if often requested; it  
    26 only helps the algorithm run a bit faster). The elements on the diagonal  
    27 (representing the element's distance from itself) are ignored. 
    28  
    29 Distance matrix can have the attribute objects describing the objects we  
    30 are clustering (this is available only in Python). This can be any sequence  
    31 of the same length as the matrix - an ExampleTable, a list of examples, a  
    32 list of attributes (if you're clustering attributes), or even a string of  
    33 the correct length. This attribute is not used in clustering but is only  
    34 passed to the clusters' attribute ``mapping`` (see below), which will hold a  
    35 reference to it (if you modify the list, the changes will affect the  
    36 clusters as well). 
     10For hierarchical clustering we need to compute distances between 
     11instances. The method works in approximately O(n2) time (with the worst 
     12case O(n3)).   
     13 
     14The distance should contain no negative elements. This limitation is due 
     15to implementation details of the algorithm and helpt the algorithm to 
     16run a bit faste. The elements on the diagonal (representing the element's 
     17distance from itself) are ignored. 
     18 
     19Basic functionality 
     20------------------- 
     21 
     22.. autofunction:: clustering 
    3723 
    3824.. class:: HierarchicalClustering 
     
    5743    .. attribute:: overwrite_matrix 
    5844 
    59         If true (default is false), the algorithm will work on the original 
    60         distance matrix, destroying it in the process. The benefit is that it 
    61         will need much less memory (not much more than what is needed to store 
    62         the tree of clusters). 
    63          
     45        If True (default is False), the algorithm will save memory 
     46        by working on the original distance matrix, destroying it in 
     47        the process. 
     48 
    6449    .. attribute:: progress_callback 
    6550         
    66         A callback function (None by default). It can be any function or 
    67         callable class in Python, which accepts a single float as an 
    68         argument. The function only gets called if the number of objects 
    69         being clustered is at least 1000. It will be called for 101 times, 
    70         and the argument will give the proportion of the work been done. 
    71         The time intervals between the function calls won't be equal (sorry 
    72         about that...) since the clustering proceeds faster as the number 
    73         of clusters decreases. 
     51        A callback function (None by default), which will be called 101 times. 
     52        The function only gets called if the number of objects is at least 1000.  
    7453         
    7554    .. method:: __call__(matrix) 
     
    7857        argument. It returns an instance of HierarchicalCluster representing 
    7958        the root of the hierarchy (instance of :class:`HierarchicalCluster`). 
    80         See examples section for details. 
    8159         
    8260        :param matrix: A distance matrix to perform the clustering on. 
     
    9775     
    9876        The left sub-cluster (defined only if there are only two branches). 
    99          
    100         .. note:: Same as ``branches[0]`` 
     77        Same as ``branches[0]``. 
    10178         
    10279    .. attribute:: right 
    10380     
    10481        The right sub-cluster (defined only if there are only two branches). 
    105          
    106         .. note:: Same as ``branches[1]`` 
     82        Same as ``branches[1]``. 
    10783         
    10884    .. attribute:: height 
     
    11591        for all clusters in the hierarchy - it simply represents the indices 
    11692        ordered according to the clustering. 
    117          
     93     
     94    .. attribute:: mapping.objects 
     95 
     96        A sequence describing objects - an :obj:`Orange.data.Table`, a 
     97        list of instance, a list of features (when clustering features), 
     98        or even a string of the same length as the number of elements. 
     99 
    118100    .. attribute:: first 
    119101    .. attribute:: last 
    120102     
    121103        ``first`` and ``last`` are indices into the elements of ``mapping`` that 
    122         belong to that cluster. (Seems weird, but is trivial - wait for the 
    123         examples. On the other hand, you probably won't need to understand this 
    124         anyway). 
     104        belong to that cluster. 
    125105 
    126106    .. method:: __len__() 
     
    167147    :lines: 16-20 
    168148             
    169 The output is not exactly nice, but it will have to do. Our clustering, 
    170 printed by calling printClustering(root) looks like this  
     149Our clustering, 
     150printed by calling printClustering(root) looks like  
    171151:: 
    172152     
     
    186166    0 4 5 7 8 9  
    187167     
    188 Everything that can be iterated over, can as well be cast into a list or 
    189 tuple. Let us demonstrate this by writing a better function for printing 
    190 out the clustering (which will also come handy for something else in a 
    191 while). The one above supposed that each leaf contains a single object. 
    192 This is not necessarily so; instead of printing out the first (and 
     168Let us write a better function for printing 
     169out the clustering: instead of printing out the first (and 
    193170supposedly the only) element of cluster, cluster[0], we shall print 
    194171it out as a tuple.  
     
    197174    :lines: 22-26 
    198175             
    199 The distance matrix could have been given a list of objects. We could, 
    200 for instance, put 
    201      
    202 .. literalinclude:: code/hierarchical-example.py 
    203     :lines: 28-29 
    204  
    205 above calling the HierarchicalClustering. 
    206  
    207 .. note:: This code will actually trigger a warning; 
    208     to avoid it, use matrix.setattr("objects", ["Ann", "Bob".... 
    209 ..    Why this is needed is explained in the page on `Orange peculiarities`_. TODO: Create page on Orange Peculiarities. 
    210  
    211 If we've forgotten to store the objects into matrix prior to clustering, 
    212 nothing is lost. We can add it into clustering later, by 
     176We can add object description into clustering by 
    213177 
    214178.. literalinclude:: code/hierarchical-example.py 
    215179    :lines: 31 
    216180     
    217 So, what do these "objects" do? Call printClustering(root) again and you'll 
    218 see. Or, let us print out the elements of the first left cluster, as we did 
    219 before.  
    220 :: 
     181As before, let us print out the elements of the first left cluster:: 
    221182 
    222183    >>> for el in root.left: 
     
    320281However, instead of list of lists, it will return a list of tables. 
    321282 
    322 How the data in ``HierarchicalCluster`` is really stored? 
     283Exploring hierarchical clusters 
    323284--------------------------------------------------------- 
    324285 
     
    353314``objects``. 
    354315 
    355  
    356316Subclusters are ordered so that ``cluster.left.last`` always equals 
    357317``cluster.right.first`` or, in general, ``cluster.branches[i].last`` 
    358318equals ``cluster.branches[i+1].first``. 
    359319 
    360  
    361 Swapping and permutation do three things: change the order of elements in 
    362 ``branches``, permute the corresponding regions in ``mapping`` and adjust 
    363 the ``first`` and ``last`` for all the clusters below. For the latter, when 
    364 subclusters of cluster are permuted, the entire subtree starting at 
    365 ``cluster.branches[i]`` is moved by the same offset. 
    366  
     320Swapping and permutation do three things: change the order of 
     321elements in ``branches``, permute the corresponding regions in 
     322:obj:`~HierarchicalCluster.mapping` and adjust the ``first`` and ``last`` 
     323for all the clusters below. For the latter, when subclusters of cluster 
     324are permuted, the entire subtree starting at ``cluster.branches[i]`` 
     325is moved by the same offset. 
    367326 
    368327The hierarchy of objects that represent a clustering is open, everything is 
     
    381340----------------- 
    382341 
    383 .. autofunction:: clustering 
    384342.. autofunction:: clustering_features 
    385343.. autofunction:: cluster_to_list 
     
    466424    :param data: Input data table for clustering. 
    467425    :type data: :class:`Orange.data.Table` 
    468     :param distance: Attribute distance constructor  
    469         .. note:: currently not used. 
     426    :param distance: Attribute distance constructor  (currently not used). 
    470427    :param linkage: Linkage flag. Must be one of global module level flags: 
    471428     
  • Orange/data/discretization.py

    r9943 r10046  
    1313 
    1414    :param features: data features to discretize. None (default) to discretize all features. 
    15     :type features: list of :class:`Orange.data.variable.Variable` 
     15    :type features: list of :class:`Orange.feature.Descriptor` 
    1616 
    1717    :param method: feature discretization method. 
Note: See TracChangeset for help on using the changeset viewer.