Changeset 7981:7262d5f0eb72 in orange
 Timestamp:
 06/03/11 16:10:02 (3 years ago)
 Branch:
 default
 Convert:
 99c55d19fe6bb87f4e1e211113d7e800a1695f79
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/clustering/mixture.py
r7885 r7981 13 13 import sys, os 14 14 import numpy 15 import random 15 16 import Orange.data 16 17 … … 25 26 26 27 def __call__(self, instance): 27 """ Return the conditionalprobability of instance.28 """ Return the probability of instance. 28 29 """ 29 30 return numpy.sum(prob_est([instance], self.weights, self.means, self.covariances)) 30 31 31 32 def __getitem__(self, index): 32 """ Return the indexth gaussian 33 """ Return the indexth gaussian. 33 34 """ 34 35 return GMModel([1.0], self.means[index: index + 1], self.covariances[index: index + 1]) 35 36 # def __getslice__(self, slice):37 # pass38 36 39 37 def __len__(self): … … 41 39 42 40 43 def init_random(array, n_centers, *args, **kwargs): 44 """ Init random means 45 """ 41 def init_random(data, n_centers, *args, **kwargs): 42 """ Init random means and correlations from a data table. 43 44 :param data: data table 45 :type data: :class:`Orange.data.Table` 46 :param n_centers: Number of centers and correlations to return. 47 :type n_centers: int 48 49 """ 50 if isinstance(data, Orange.data.Table): 51 array, w, c = data.toNumpyMA() 52 else: 53 array = numpy.asarray(data) 54 46 55 min, max = array.max(0), array.min(0) 47 56 dim = array.shape[1] … … 52 61 correlations = [numpy.asmatrix(numpy.eye(dim)) for i in range(n_centers)] 53 62 return means, correlations 54 63 64 def init_kmeans(data, n_centers, *args, **kwargs): 65 """ Init with kmeans algorithm. 66 67 :param data: data table 68 :type data: :class:`Orange.data.Table` 69 :param n_centers: Number of centers and correlations to return. 70 :type n_centers: int 71 72 """ 73 if not isinstance(data, Orange.data.Table): 74 raise TypeError("Orange.data.Table instance expected!") 75 from Orange.clustering.kmeans import Clustering 76 km = Clustering(data, centroids=n_centers, maxiters=20, nstart=3) 77 centers = Orange.data.Table(km.centroids) 78 centers, w, c = centers.toNumpyMA() 79 dim = len(data.domain.attributes) 80 correlations = [numpy.asmatrix(numpy.eye(dim)) for i in range(n_centers)] 81 return centers, correlations 55 82 56 83 def prob_est1(data, mean, covariance, inv_covariance=None): 57 """ Return the probability of data given mean and covariance matrix 84 """ Return the probability of data given mean and covariance matrix 58 85 """ 59 86 data = numpy.asmatrix(data) … … 62 89 inv_covariance = numpy.linalg.pinv(covariance) 63 90 64 inv_covariance = numpy.asmatrix(inv_covariance) 91 inv_covariance = numpy.asmatrix(inv_covariance) 65 92 66 93 diff = data  mean … … 76 103 assert(det != 0.0) 77 104 p /= det 78 # if det != 0.0:79 # p /= det80 # else:81 # p = numpy.ones(p.shape) / p.shape[0]82 105 return p 83 106 84 107 85 108 def prob_est(data, weights, means, covariances, inv_covariances=None): 86 """ Return the probability estimation of data given weight ed, means and109 """ Return the probability estimation of data given weights, means and 87 110 covariances. 88 111 … … 103 126 """ An EM solver for gaussian mixture model 104 127 """ 128 _TRACE_MEAN = False 105 129 def __init__(self, data, weights, means, covariances): 106 130 self.data = data … … 182 206 """ Run the EM algorithm. 183 207 """ 184 185 # from pylab import plot, show, draw, ion 186 # ion() 187 # plot(self.data[:, 0], self.data[:, 1], "ro") 188 # vec_plot = plot(self.means[:, 0], self.means[:, 1], "bo")[0] 208 if self._TRACE_MEAN: 209 from pylab import plot, show, draw, ion 210 ion() 211 plot(self.data[:, 0], self.data[:, 1], "ro") 212 vec_plot = plot(self.means[:, 0], self.means[:, 1], "bo")[0] 213 189 214 curr_iter = 0 190 215 … … 193 218 self.one_step() 194 219 195 # vec_plot.set_xdata(self.means[:, 0]) 196 # vec_plot.set_ydata(self.means[:, 1]) 197 # draw() 220 if self._TRACE_MEAN: 221 vec_plot.set_xdata(self.means[:, 0]) 222 vec_plot.set_ydata(self.means[:, 1]) 223 draw() 198 224 199 225 curr_iter += 1 200 print curr_iter201 print abs(old_objective  self.log_likelihood)226 # print curr_iter 227 # print abs(old_objective  self.log_likelihood) 202 228 if abs(old_objective  self.log_likelihood) < eps or curr_iter > max_iter: 203 229 break 204 230 205 231 206 class GASolver(object):207 """ A toy genetic algorithm solver208 """209 def __init__(self, data, weights, means, covariances):210 raise NotImplementedError211 212 213 class PSSolver(object):214 """ A toy particle swarm solver215 """216 def __init__(self, data, weights, means, covariances):217 raise NotImplementedError218 219 class HybridSolver(object):220 """ A hybrid solver221 """222 def __init__(self, data, weights, means, covariances):223 raise NotImplementedError232 #class GASolver(object): 233 # """ A toy genetic algorithm solver 234 # """ 235 # def __init__(self, data, weights, means, covariances): 236 # raise NotImplementedError 237 # 238 # 239 #class PSSolver(object): 240 # """ A toy particle swarm solver 241 # """ 242 # def __init__(self, data, weights, means, covariances): 243 # raise NotImplementedError 244 # 245 #class HybridSolver(object): 246 # """ A hybrid solver 247 # """ 248 # def __init__(self, data, weights, means, covariances): 249 # raise NotImplementedError 224 250 225 251 226 252 class GaussianMixture(object): 253 """ Computes the gaussian mixture model from an Orange dataset. 254 """ 227 255 def __new__(cls, data=None, weightId=None, **kwargs): 228 256 self = object.__new__(cls) … … 233 261 return self 234 262 235 def __init__(self, n_centers=3, init_function=init_ random):263 def __init__(self, n_centers=3, init_function=init_kmeans): 236 264 self.n_centers = n_centers 237 265 self.init_function = init_function 238 266 239 267 def __call__(self, data, weightId=None): 268 means, correlations = self.init_function(data, self.n_centers) 269 means = numpy.asmatrix(means) 240 270 array, _, _ = data.to_numpy_MA() 241 271 solver = EMSolver(array, numpy.ones((self.n_centers)) / self.n_centers, 242 *self.init_function(array, self.n_centers))272 means, correlations) 243 273 solver.run() 244 274 return GMModel(solver.weights, solver.means, solver.covariances) … … 246 276 247 277 def plot_model(data_array, mixture, axis=(0, 1), samples=20, contour_lines=20): 248 278 """ Plot the scaterplot of data_array and the contour lines of the 279 probability for the mixture. 280 281 """ 249 282 import matplotlib 250 283 import matplotlib.pylab as plt … … 257 290 258 291 weights = mixture.weights 259 means = [m[axis] for m in mixture.means]292 means = mixture.means[:, axis] 260 293 261 294 covariances = [cov[axis,:][:, axis] for cov in mixture.covariances] … … 283 316 cmap=cm.gray, extent=extent) 284 317 318 plt.plot(means[:, 0], means[:, 1], "b+") 285 319 plt.show() 286 320 287 def test( ):321 def test(seed=0): 288 322 # data = Orange.data.Table(os.path.expanduser("../../doc/datasets/brownselected.tab")) 289 data = Orange.data.Table(os.path.expanduser("~/Documents/brownselectedfss.tab")) 290 # data = Orange.data.Table("../../doc/datasets/iris.tab") 323 # data = Orange.data.Table(os.path.expanduser("~/Documents/brownselectedfss.tab")) 324 data = Orange.data.Table(os.path.expanduser("~/Documents/brownselectedfss1.tab")) 325 data = Orange.data.Table("../../doc/datasets/iris.tab") 291 326 # data = Orange.data.Table(Orange.data.Domain(data.domain[:2], None), data) 292 numpy.random.seed(0) 293 gmm = GaussianMixture(data, n_centers=3) 294 plot_model(data, gmm, axis=(0,1), samples=40, contour_lines=20) 327 numpy.random.seed(seed) 328 random.seed(seed) 329 gmm = GaussianMixture(data, n_centers=3, init_function=init_kmeans) 330 plot_model(data, gmm, axis=(0, 1), samples=40, contour_lines=100) 295 331 296 332
Note: See TracChangeset
for help on using the changeset viewer.