# Changes in [9907:858937f876b0:9908:6864339fd119] in orange

Ignore:
Files:
5 deleted
27 edited

Unmodified
Removed
• ## .hgignore

 r9881 source/orangeom/lib_vectors_auto.txt # Ignore build and dist dir, created by setup.py build or setup.py bdist_* . # Ignore files created by setup.py. build dist MANIFEST Orange.egg-info # Ignore dot files. docs/reference/html # Images generated by tests. # Files generated by tests. Orange/testing/regression/*/*.changed.txt Orange/testing/regression/*/*.crash.txt Orange/testing/regression/*/*.new.txt Orange/doc/modules/*.png docs/reference/rst/code/*.png Orange/doc/modules/tree1.dot Orange/doc/reference/del2.tab Orange/doc/reference/undefined-saved-dc-dk.tab Orange/doc/reference/undefined-saved-na.tab Orange/testing/regression/results_orange25/unusedValues.py.txt docs/reference/rst/code/iris.testsave.arff docs/tutorial/rst/code/adult_sample_sampled.tab docs/tutorial/rst/code/tree.dot
• ## Orange/clustering/hierarchical.py

 r9752 :param matrix: A distance matrix to perform the clustering on. :type matrix: :class:`Orange.core.SymMatrix` :type matrix: :class:`Orange.misc.SymMatrix` Let us construct a simple distance matrix and run clustering on it. :: import Orange from Orange.clustering import hierarchical m = [[], [ 3], [ 2, 4], [17, 5, 4], [ 2, 8, 3, 8], [ 7, 5, 10, 11, 2], [ 8, 4, 1, 5, 11, 13], [ 4, 7, 12, 8, 10, 1, 5], [13, 9, 14, 15, 7, 8, 4, 6], [12, 10, 11, 15, 2, 5, 7, 3, 1]] matrix = Orange.core.SymMatrix(m) root = hierarchical.HierarchicalClustering(matrix, linkage=hierarchical.HierarchicalClustering.Average) .. literalinclude:: code/hierarchical-example.py :lines: 1-14 Root is a root of the cluster hierarchy. We can print using a simple recursive function. :: def printClustering(cluster): if cluster.branches: return "(%s%s)" % (printClustering(cluster.left), printClustering(cluster.right)) else: return str(cluster[0]) .. literalinclude:: code/hierarchical-example.py :lines: 16-20 The output is not exactly nice, but it will have to do. Our clustering, supposedly the only) element of cluster, cluster[0], we shall print it out as a tuple. :: def printClustering2(cluster): if cluster.branches: return "(%s%s)" % (printClustering2(cluster.left), printClustering2(cluster.right)) else: return str(tuple(cluster)) .. literalinclude:: code/hierarchical-example.py :lines: 22-26 The distance matrix could have been given a list of objects. We could, for instance, put :: matrix.objects = ["Ann", "Bob", "Curt", "Danny", "Eve", "Fred", "Greg", "Hue", "Ivy", "Jon"] .. literalinclude:: code/hierarchical-example.py :lines: 28-29 above calling the HierarchicalClustering. If we've forgotten to store the objects into matrix prior to clustering, nothing is lost. We can add it into clustering later, by :: root.mapping.objects = ["Ann", "Bob", "Curt", "Danny", "Eve", "Fred", "Greg", "Hue", "Ivy", "Jon"] .. literalinclude:: code/hierarchical-example.py :lines: 31 So, what do these "objects" do? Call printClustering(root) again and you'll of ``root.left`` and ``root.right``. Let us write function for cluster pruning. :: def prune(cluster, togo): if cluster.branches: if togo<0: cluster.branches = None else: for branch in cluster.branches: prune(branch, togo-cluster.height) Let us write function for cluster pruning. .. literalinclude:: code/hierarchical-example.py :lines: 33-39 We shall use ``printClustering2`` here, since we can have multiple elements We've ended up with four clusters. Need a list of clusters? Here's the function. :: def listOfClusters0(cluster, alist): if not cluster.branches: alist.append(list(cluster)) else: for branch in cluster.branches: listOfClusters0(branch, alist) def listOfClusters(root): l = [] listOfClusters0(root, l) return l Here's the function. .. literalinclude:: code/hierarchical-example.py :lines: 41-51 The function returns a list of lists, in our case and cluster it with average linkage. Since we don't need the matrix, we shall let the clustering overwrite it (not that it's needed for such a small data set as Iris). :: import Orange from Orange.clustering import hierarchical data = Orange.data.Table("iris") matrix = Orange.core.SymMatrix(len(data)) matrix.setattr("objects", data) distance = Orange.distance.Euclidean(data) for i1, instance1 in enumerate(data): for i2 in range(i1+1, len(data)): matrix[i1, i2] = distance(instance1, data[i2]) clustering = hierarchical.HierarchicalClustering() clustering.linkage = clustering.Average clustering.overwrite_matrix = 1 root = clustering(matrix) such a small data set as Iris). .. literalinclude:: code/hierarchical-example-2.py :lines: 1-15 Note that we haven't forgotten to set the ``matrix.objects``. We did it through ``matrix.setattr`` to avoid the warning. Let us now prune the clustering using the function we've written above, and print out the clusters. :: prune(root, 1.4) for n, cluster in enumerate(listOfClusters(root)): print "\n\n Cluster %i \n" % n for instance in cluster: print instance clusters. .. literalinclude:: code/hierarchical-example-2.py :lines: 16-20 Since the printout is pretty long, it might be more informative to just print out the class distributions for each cluster. :: for cluster in listOfClusters(root): dist = Orange.core.get_class_distribution(cluster) for e, d in enumerate(dist): print "%s: %3.0f " % (data.domain.class_var.values[e], d), print print out the class distributions for each cluster. .. literalinclude:: code/hierarchical-example-2.py :lines: 22-26 Here's what it shows. :: instance, call a learning algorithms, passing a cluster as an argument. It won't mind. If you, however, want to have a list of table, you can easily convert the list by :: tables = [Orange.data.Table(cluster) for cluster in listOfClusters(root)] easily convert the list by .. literalinclude:: code/hierarchical-example-2.py :lines: 28 Finally, if you are dealing with examples, you may want to take the function """ distance = distance_constructor(data) matrix = orange.SymMatrix(len(data)) matrix = Orange.misc.SymMatrix(len(data)) for i in range(len(data)): for j in range(i+1): """ matrix = orange.SymMatrix(len(data.domain.attributes)) matrix = Orange.misc.SymMatrix(len(data.domain.attributes)) for a1 in range(len(data.domain.attributes)): for a2 in range(a1): :type tree: :class:`HierarchicalCluster` :param matrix: SymMatrix that was used to compute the clustering. :type matrix: :class:`Orange.core.SymMatrix` :type matrix: :class:`Orange.misc.SymMatrix` :param progress_callback: Function used to report on progress. :type progress_callback: function :type tree: :class:`HierarchicalCluster` :param matrix: SymMatrix that was used to compute the clustering. :type matrix: :class:`Orange.core.SymMatrix` :type matrix: :class:`Orange.misc.SymMatrix` :param progress_callback: Function used to report on progress. :type progress_callback: function def feature_distance_matrix(data, distance=None, progress_callback=None): """ A helper function that computes an :class:`Orange.core.SymMatrix` of """ A helper function that computes an :class:`Orange.misc.SymMatrix` of all pairwise distances between features in `data`. :type progress_callback: function :rtype: :class:`Orange.core.SymMatrix` :rtype: :class:`Orange.misc.SymMatrix` """ attributes = data.domain.attributes matrix = orange.SymMatrix(len(attributes)) matrix = Orange.misc.SymMatrix(len(attributes)) iter_count = matrix.dim * (matrix.dim - 1) / 2 milestones = progress_bar_milestones(iter_count, 100) :type cluster: :class:`HierarchicalCluster` :rtype: :class:`Orange.core.SymMatrix` :rtype: :class:`Orange.misc.SymMatrix` """ mapping = cluster.mapping matrix = Orange.core.SymMatrix(len(mapping)) matrix = Orange.misc.SymMatrix(len(mapping)) for cluster in postorder(cluster): if cluster.branches: if __name__=="__main__": data = orange.ExampleTable("doc//datasets//brown-selected.tab") #    data = orange.ExampleTable("doc//datasets//iris.tab") root = hierarchicalClustering(data, order=True) #, linkage=orange.HierarchicalClustering.Single) attr_root = hierarchicalClustering_attributes(data, order=True) #    print root #    d = DendrogramPlotPylab(root, data=data, labels=[str(ex.getclass()) for ex in data], dendrogram_width=0.4, heatmap_width=0.3,  params={}, cmap=None) #    d.plot(show=True, filename="graph.png") dendrogram_draw("graph.eps", root, attr_tree=attr_root, data=data, labels=[str(e.getclass()) for e in data], tree_height=50, #width=500, height=500, cluster_colors={root.right:(255,0,0), root.right.right:(0,255,0)}, color_palette=ColorPalette([(255, 0, 0), (0,0,0), (0, 255,0)], gamma=0.5, overflow=(255, 255, 255), underflow=(255, 255, 255))) #, minv=-0.5, maxv=0.5)
• ## Orange/data/__init__.py

 r9671 from orange import newmetaid as new_meta_id from orange import SymMatrix

• ## Orange/feature/__init__.py

 r9671 import imputation from Orange.core import Variable as Descriptor from Orange.core import EnumVariable as Discrete from Orange.core import FloatVariable as Continuous from Orange.core import PythonVariable as Python from Orange.core import StringVariable as String from Orange.core import VarList as Descriptors from Orange.core import newmetaid as new_meta_id from Orange.core import Variable as V make = V.make retrieve = V.get_existing MakeStatus = V.MakeStatus del V __docformat__ = 'restructuredtext'
• ## Orange/feature/discretization.py

 r9878 Discretization, \ Preprocessor_discretize def entropyDiscretization_wrapper(data):
• ## Orange/misc/__init__.py

 r9698 Module Orange.misc contains common functions and classes which are used in other modules. .. index: SymMatrix ----------------------- SymMatrix ----------------------- :obj:`SymMatrix` implements symmetric matrices of size fixed at construction time (and stored in :obj:`SymMatrix.dim`). .. class:: SymMatrix .. attribute:: dim Matrix dimension. .. attribute:: matrix_type Can be ``SymMatrix.Lower`` (0), ``SymMatrix.Upper`` (1), ``SymMatrix.Symmetric`` (2, default), ``SymMatrix.Lower_Filled`` (3) or ``SymMatrix.Upper_Filled`` (4). If the matrix type is ``Lower`` or ``Upper``, indexing above or below the diagonal, respectively, will fail. With ``Lower_Filled`` and ``Upper_Filled``, the elements upper or lower, respectively, still exist and are set to zero, but they cannot be modified. The default matrix type is ``Symmetric``, but can be changed at any time. If matrix type is ``Upper``, it is printed as: >>> m.matrix_type = m.Upper >>> print m (( 1.000,  2.000,  3.000,  4.000), (         4.000,  6.000,  8.000), (                 9.000, 12.000), (                        16.000)) Changing the type to ``Lower_Filled`` changes the printout to >>> m.matrix_type = m.Lower_Filled >>> print m (( 1.000,  0.000,  0.000,  0.000), ( 2.000,  4.000,  0.000,  0.000), ( 3.000,  6.000,  9.000,  0.000), ( 4.000,  8.000, 12.000, 16.000)) .. method:: __init__(dim[, default_value]) Construct a symmetric matrix of the given dimension. :param dim: matrix dimension :type dim: int :param default_value: default value (0 by default) :type default_value: double .. method:: __init__(instances) Construct a new symmetric matrix containing the given data instances. These can be given as Python list containing lists or tuples. :param instances: data instances :type instances: list of lists The following example fills a matrix created above with data in a list:: import Orange m = [[], [ 3], [ 2, 4], [17, 5, 4], [ 2, 8, 3, 8], [ 7, 5, 10, 11, 2], [ 8, 4, 1, 5, 11, 13], [ 4, 7, 12, 8, 10, 1, 5], [13, 9, 14, 15, 7, 8, 4, 6], [12, 10, 11, 15, 2, 5, 7, 3, 1]] matrix = Orange.data.SymMatrix(m) SymMatrix also stores diagonal elements. They are set to zero, if they are not specified. The missing elements (shorter lists) are set to zero as well. If a list spreads over the diagonal, the constructor checks for asymmetries. For instance, the matrix :: m = [[], [ 3,  0, f], [ 2,  4]] is only OK if f equals 2. Finally, no row can be longer than matrix size. .. method:: get_values() Return all matrix values in a Python list. .. method:: get_KNN(i, k) Return k columns with the lowest value in the i-th row. :param i: i-th row :type i: int :param k: number of neighbors :type k: int .. method:: avg_linkage(clusters) Return a symmetric matrix with average distances between given clusters. :param clusters: list of clusters :type clusters: list of lists .. method:: invert(type) Invert values in the symmetric matrix. :param type: 0 (-X), 1 (1 - X), 2 (max - X), 3 (1 / X) :type type: int .. method:: normalize(type) Normalize values in the symmetric matrix. :param type: 0 (normalize to [0, 1] interval), 1 (Sigmoid) :type type: int ------------------- Indexing ------------------- For symmetric matrices the order of indices is not important: if ``m`` is a SymMatrix, then ``m[2, 4]`` addresses the same element as ``m[4, 2]``. .. literalinclude:: code/symmatrix.py :lines: 1-6 Although only the lower left half of the matrix was set explicitely, the whole matrix is constructed. >>> print m (( 1.000,  2.000,  3.000,  4.000), ( 2.000,  4.000,  6.000,  8.000), ( 3.000,  6.000,  9.000, 12.000), ( 4.000,  8.000, 12.000, 16.000)) Entire rows are indexed with a single index. They can be iterated over in a for loop or sliced (with, for example, ``m[:3]``): >>> print m[1] (3.0, 6.0, 9.0, 0.0) >>> m.matrix_type = m.Lower >>> for row in m: ...     print row (1.0,) (2.0, 4.0) (3.0, 6.0, 9.0) (4.0, 8.0, 12.0, 16.0) .. index: Random number generator from Orange.core import RandomGenerator as Random from orange import SymMatrix # addons is intentionally not imported; if it were, add-ons' directories would
• ## docs/reference/rst/Orange.classification.rst

 r9820 Orange.classification.svm Orange.classification.tree Orange.classification.random
• ## docs/reference/rst/Orange.data.rst

 r9372 .. toctree:: Orange.data.variable Orange.data.domain Orange.data.value Orange.data.sample Orange.data.formats Orange.data.symmatrix Orange.data.discretization

• ## docs/reference/rst/Orange.feature.discretization.rst

 r9863 value according to the rule found by discretization. In this respect, the discretization behaves similar to :class:`Orange.classification.Learner`. Utility functions ================= Some functions and classes that can be used for categorization of continuous features. Besides several general classes that can help in this task, we also provide a function that may help in entropy-based discretization (Fayyad & Irani), and a wrapper around classes for categorization that can be used for learning. .. autoclass:: Orange.feature.discretization.DiscretizedLearner_Class .. autoclass:: DiscretizeTable .. rubric:: Example FIXME. A chapter on `feature subset selection <../ofb/o_fss.htm>`_ in Orange for Beginners tutorial shows the use of DiscretizedLearner. Other discretization classes from core Orange are listed in chapter on `categorization <../ofb/o_categorization.htm>`_ of the same tutorial. Discretization Algorithms

• ## docs/reference/rst/Orange.feature.rst

 r9372 :maxdepth: 2 Orange.feature.descriptor Orange.feature.scoring Orange.feature.selection
• ## docs/reference/rst/code/distances-test.py

 r9823 # Euclidean distance constructor d2Constr = Orange.distance.instances.EuclideanConstructor() d2Constr = Orange.distance.Euclidean() d2 = d2Constr(iris) # Constructs dPears = Orange.distance.instances.PearsonRConstructor(iris) dPears = Orange.distance.PearsonR(iris) #reference instance
• ## docs/reference/rst/code/majority-classification.py

 r9823 res = Orange.evaluation.testing.cross_validation(learners, monks) CAs = Orange.evaluation.scoring.CA(res, reportSE=True) CAs = Orange.evaluation.scoring.CA(res, report_se=True) print "Tree:    %5.3f+-%5.3f" % CAs[0]

 r9823 # Construct a distance matrix using Euclidean distance dist = Orange.core.ExamplesDistanceConstructor_Euclidean(iris) matrix = Orange.core.SymMatrix(len(iris)) matrix = Orange.misc.SymMatrix(len(iris)) for i in range(len(iris)): for j in range(i+1):
• ## docs/reference/rst/code/mds-euclid-torgerson-3d.py

 r9866 # Construct a distance matrix using Euclidean distance dist = Orange.distance.Euclidean(iris) matrix = Orange.core.SymMatrix(len(iris)) matrix = Orange.misc.SymMatrix(len(iris)) matrix.setattr('items', iris) for i in range(len(iris)):
• ## docs/reference/rst/code/mds-scatterplot.py

 r9838 # Construct a distance matrix using Euclidean distance euclidean = Orange.distance.Euclidean(iris) distance = Orange.core.SymMatrix(len(iris)) distance = Orange.misc.SymMatrix(len(iris)) for i in range(len(iris)): for j in range(i + 1):
• ## docs/reference/rst/code/outlier2.py

 r9823 bridges = Orange.data.Table("bridges") outlier_det = Orange.preprocess.outliers.OutlierDetection() outlier_det.set_examples(bridges, Orange.distance.instances.EuclideanConstructor(bridges)) outlier_det.set_examples(bridges, Orange.distance.Euclidean(bridges)) outlier_det.set_knn(3) z_values = outlier_det.z_values()
• ## docs/reference/rst/code/svm-custom-kernel.py

 r9823 from Orange.classification.svm import SVMLearner, kernels from Orange.distance.instances import EuclideanConstructor from Orange.distance.instances import HammingConstructor from Orange.distance import Euclidean from Orange.distance import Hamming iris = data.Table("iris.tab") l1 = SVMLearner() l1.kernel_func = kernels.RBFKernelWrapper(EuclideanConstructor(iris), gamma=0.5) l1.kernel_func = kernels.RBFKernelWrapper(Euclidean(iris), gamma=0.5) l1.kernel_type = SVMLearner.Custom l1.probability = True l2 = SVMLearner() l2.kernel_func = kernels.RBFKernelWrapper(HammingConstructor(iris), gamma=0.5) l2.kernel_func = kernels.RBFKernelWrapper(Hamming(iris), gamma=0.5) l2.kernel_type = SVMLearner.Custom l2.probability = True l3 = SVMLearner() l3.kernel_func = kernels.CompositeKernelWrapper( kernels.RBFKernelWrapper(EuclideanConstructor(iris), gamma=0.5), kernels.RBFKernelWrapper(HammingConstructor(iris), gamma=0.5), l=0.5) kernels.RBFKernelWrapper(Euclidean(iris), gamma=0.5), kernels.RBFKernelWrapper(Hamming(iris), gamma=0.5), l=0.5) l3.kernel_type = SVMLearner.Custom l3.probability = True
• ## docs/reference/rst/code/symmatrix.py

 r9823 import Orange m = Orange.data.SymMatrix(4) m = Orange.misc.SymMatrix(4) for i in range(4): for j in range(i+1):
• ## docs/reference/rst/code/testing-test.py

 r9823 def printResults(res): CAs = Orange.evaluation.scoring.CA(res, reportSE=1) CAs = Orange.evaluation.scoring.CA(res, report_se=1) for name, ca in zip(res.classifierNames, CAs): print "%s: %5.3f+-%5.3f" % (name, ca[0], 1.96 * ca[1]),
• ## docs/reference/rst/code/variable-get_value_from.py

 r9823 # Category:    core # Uses:        monks-1 # Referenced:  Orange.data.variable # Classes:     Orange.data.variable.Discrete # Referenced:  Orange.feature # Classes:     Orange.feature.Discrete import Orange monks = Orange.data.Table("monks-1") e2 = Orange.data.variable.Discrete("e2", values=["not 1", "1"]) e2 = Orange.feature.Discrete("e2", values=["not 1", "1"]) e2.get_value_from = checkE print Orange.core.MeasureAttribute_info(e2, monks) print Orange.feature.scoring.InfoGain(e2, monks) dist = Orange.core.Distribution(e2, monks)
• ## docs/reference/rst/index.rst

 r9729 Orange.data Orange.feature Orange.associate Orange.evaluation Orange.feature Orange.multilabel
• ## setup.py

 r9879 install.run(self) # Create a .pth file wiht a path inside the Orange/orng directory # Create a .pth file with a path inside the Orange/orng directory # so the old modules are importable self.path_file, self.extra_dirs = ("orange-orng-modules", "Orange/orng")
• ## source/orange/_aliases.txt

 r9907 TransformValue sub_transformer subtransformer ImputerConstructor impute_class imputeClass
• ## source/orange/discretize.hpp

 r9863 __REGISTER_CLASS int maxNumberOfIntervals; //P maximal number of intervals; default = 0 (no limits) int maxNumberOfIntervals; //P(+n) maximal number of intervals; default = 0 (no limits) bool forceAttribute; //P minimal number of intervals; default = 0 (no limits)
Note: See TracChangeset for help on using the changeset viewer.