pyclustering  0.10.1
pyclustring is a Python, C++ data mining library.
ema.py
1 """!
2 
3 @brief Cluster analysis algorithm: Expectation-Maximization Algorithm for Gaussian Mixture Model.
4 @details Implementation based on paper @cite article::ema::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright BSD-3-Clause
9 
10 """
11 
12 
13 import numpy
14 import random
15 
16 from pyclustering.cluster import cluster_visualizer
17 from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
18 from pyclustering.cluster.kmeans import kmeans
19 
20 from pyclustering.utils import pi, calculate_ellipse_description, euclidean_distance_square
21 
22 from enum import IntEnum
23 
24 import matplotlib.pyplot as plt
25 import matplotlib.animation as animation
26 from matplotlib import patches
27 
28 
29 def gaussian(data, mean, covariance):
30  """!
31  @brief Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covariance in case
32  multi-dimensional data.
33 
34  @param[in] data (list): Data that is used for gaussian calculation.
35  @param[in] mean (float|numpy.array): Mathematical expectation used for calculation.
36  @param[in] covariance (float|numpy.array): Variance or covariance matrix for calculation.
37 
38  @return (list) Value of gaussian function for each point in dataset.
39 
40  """
41  dimension = float(len(data[0]))
42 
43  if dimension != 1.0:
44  inv_variance = numpy.linalg.pinv(covariance)
45  else:
46  inv_variance = 1.0 / covariance
47 
48  divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance))
49  if divider != 0.0:
50  right_const = 1.0 / divider
51  else:
52  right_const = float('inf')
53 
54  result = []
55 
56  for point in data:
57  mean_delta = point - mean
58  point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) )
59  result.append(point_gaussian)
60 
61  return result
62 
63 
64 
65 class ema_init_type(IntEnum):
66  """!
67  @brief Enumeration of initialization types for Expectation-Maximization algorithm.
68 
69  """
70 
71 
73  RANDOM_INITIALIZATION = 0
74 
75 
79  KMEANS_INITIALIZATION = 1
80 
81 
82 
84  """!
85  @brief Provides services for preparing initial means and covariances for Expectation-Maximization algorithm.
86  @details Initialization strategy is defined by enumerator 'ema_init_type': random initialization and
87  kmeans with kmeans++ initialization. Here an example of initialization using kmeans strategy:
88 
89  @code
90  from pyclustering.utils import read_sample
91  from pyclustering.samples.definitions import FAMOUS_SAMPLES
92  from pyclustering.cluster.ema import ema_initializer
93 
94  sample = read_sample(FAMOUS_SAMPLES.SAMPLE_OLD_FAITHFUL)
95  amount_clusters = 2
96 
97  initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize()
98  print(initial_means)
99  print(initial_covariance)
100  @endcode
101 
102  """
103 
104  __MAX_GENERATION_ATTEMPTS = 10
105 
106  def __init__(self, sample, amount):
107  """!
108  @brief Constructs EM initializer.
109 
110  @param[in] sample (list): Data that will be used by the EM algorithm.
111  @param[in] amount (uint): Amount of clusters that should be allocated by the EM algorithm.
112 
113  """
114  self.__sample = sample
115  self.__amount = amount
116 
117 
118  def initialize(self, init_type = ema_init_type.KMEANS_INITIALIZATION):
119  """!
120  @brief Calculates initial parameters for EM algorithm: means and covariances using
121  specified strategy.
122 
123  @param[in] init_type (ema_init_type): Strategy for initialization.
124 
125  @return (float|list, float|numpy.array) Initial means and variance (covariance matrix in case multi-dimensional data).
126 
127  """
128  if init_type == ema_init_type.KMEANS_INITIALIZATION:
129  return self.__initialize_kmeans()
130 
131  elif init_type == ema_init_type.RANDOM_INITIALIZATION:
132  return self.__initialize_random()
133 
134  raise NameError("Unknown type of EM algorithm initialization is specified.")
135 
136 
137  def __calculate_initial_clusters(self, centers):
138  """!
139  @brief Calculate Euclidean distance to each point from the each cluster.
140  @brief Nearest points are captured by according clusters and as a result clusters are updated.
141 
142  @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
143 
144  """
145 
146  clusters = [[] for _ in range(len(centers))]
147  for index_point in range(len(self.__sample)):
148  index_optim, dist_optim = -1, 0.0
149 
150  for index in range(len(centers)):
151  dist = euclidean_distance_square(self.__sample[index_point], centers[index])
152 
153  if (dist < dist_optim) or (index == 0):
154  index_optim, dist_optim = index, dist
155 
156  clusters[index_optim].append(index_point)
157 
158  return clusters
159 
160 
161  def __calculate_initial_covariances(self, initial_clusters):
162  covariances = []
163  for initial_cluster in initial_clusters:
164  if len(initial_cluster) > 1:
165  cluster_sample = [self.__sample[index_point] for index_point in initial_cluster]
166  covariances.append(numpy.cov(cluster_sample, rowvar=False))
167  else:
168  dimension = len(self.__sample[0])
169  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
170 
171  return covariances
172 
173 
174  def __initialize_random(self):
175  initial_means = []
176 
177  for _ in range(self.__amount):
178  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
179  attempts = 0
180  while (mean in initial_means) and (attempts < ema_initializer.__MAX_GENERATION_ATTEMPTS):
181  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
182  attempts += 1
183 
184  if attempts == ema_initializer.__MAX_GENERATION_ATTEMPTS:
185  mean = [ value + (random.random() - 0.5) * value * 0.2 for value in mean ]
186 
187  initial_means.append(mean)
188 
189  initial_clusters = self.__calculate_initial_clusters(initial_means)
190  initial_covariance = self.__calculate_initial_covariances(initial_clusters)
191 
192  return initial_means, initial_covariance
193 
194 
195  def __initialize_kmeans(self):
196  initial_centers = kmeans_plusplus_initializer(self.__sample, self.__amount).initialize()
197  kmeans_instance = kmeans(self.__sample, initial_centers, ccore = True)
198  kmeans_instance.process()
199 
200  means = kmeans_instance.get_centers()
201 
202  covariances = []
203  initial_clusters = kmeans_instance.get_clusters()
204  for initial_cluster in initial_clusters:
205  if len(initial_cluster) > 1:
206  cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ]
207  covariances.append(numpy.cov(cluster_sample, rowvar=False))
208  else:
209  dimension = len(self.__sample[0])
210  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
211 
212  return means, covariances
213 
214 
215 
217  """!
218  @brief Observer of EM algorithm for collecting algorithm state on each step.
219  @details It can be used to obtain whole picture about clustering process of EM algorithm. Allocated clusters,
220  means and covariances are stored in observer on each step. Here an example of usage:
221 
222  @code
223  from pyclustering.cluster.ema import ema, ema_observer
224  from pyclustering.utils import read_sample
225  from pyclustering.samples.definitions import SIMPLE_SAMPLES
226 
227  # Read data from text file.
228  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
229 
230  # Create EM observer.
231  observer = ema_observer()
232 
233  # Create EM algorithm to allocated four clusters and pass observer to it.
234  ema_instance = ema(sample, 4, observer=observer)
235 
236  # Run clustering process.
237  ema_instance.process()
238 
239  # Print amount of steps that were done by the algorithm.
240  print("EMA steps:", observer.get_iterations())
241 
242  # Print evolution of means and covariances.
243  print("Means evolution:", observer.get_evolution_means())
244  print("Covariances evolution:", observer.get_evolution_covariances())
245 
246  # Print evolution of clusters.
247  print("Clusters evolution:", observer.get_evolution_clusters())
248 
249  # Print final clusters.
250  print("Allocated clusters:", observer.get_evolution_clusters()[-1])
251  @endcode
252 
253  """
254  def __init__(self):
255  """!
256  @brief Initializes EM observer.
257 
258  """
259  self.__means_evolution = []
260  self.__covariances_evolution = []
261  self.__clusters_evolution = []
262 
263 
264  def __len__(self):
265  """!
266  @return (uint) Amount of iterations that were done by the EM algorithm.
267 
268  """
269  return len(self.__means_evolution)
270 
271 
272  def get_iterations(self):
273  """!
274  @return (uint) Amount of iterations that were done by the EM algorithm.
275 
276  """
277  return len(self.__means_evolution)
278 
279 
281  """!
282  @return (list) Mean of each cluster on each step of clustering.
283 
284  """
285  return self.__means_evolution
286 
287 
289  """!
290  @return (list) Covariance matrix (or variance in case of one-dimensional data) of each cluster on each step of clustering.
291 
292  """
293  return self.__covariances_evolution
294 
295 
297  """!
298  @return (list) Allocated clusters on each step of clustering.
299 
300  """
301  return self.__clusters_evolution
302 
303 
304  def notify(self, means, covariances, clusters):
305  """!
306  @brief This method is used by the algorithm to notify observer about changes where the algorithm
307  should provide new values: means, covariances and allocated clusters.
308 
309  @param[in] means (list): Mean of each cluster on currect step.
310  @param[in] covariances (list): Covariances of each cluster on current step.
311  @param[in] clusters (list): Allocated cluster on current step.
312 
313  """
314  self.__means_evolution.append(means)
315  self.__covariances_evolution.append(covariances)
316  self.__clusters_evolution.append(clusters)
317 
318 
319 
321  """!
322  @brief Visualizer of EM algorithm's results.
323  @details Provides services for visualization of particular features of the algorithm, for example,
324  in case of two-dimensional dataset it shows covariance ellipses.
325 
326  """
327 
328  @staticmethod
329  def show_clusters(clusters, sample, covariances, means, figure = None, display = True):
330  """!
331  @brief Draws clusters and in case of two-dimensional dataset draws their ellipses.
332 
333  @param[in] clusters (list): Clusters that were allocated by the algorithm.
334  @param[in] sample (list): Dataset that were used for clustering.
335  @param[in] covariances (list): Covariances of the clusters.
336  @param[in] means (list): Means of the clusters.
337  @param[in] figure (figure): If 'None' then new is figure is creater, otherwise specified figure is used
338  for visualization.
339  @param[in] display (bool): If 'True' then figure will be shown by the method, otherwise it should be
340  shown manually using matplotlib function 'plt.show()'.
341 
342  @return (figure) Figure where clusters were drawn.
343 
344  """
345 
346  visualizer = cluster_visualizer()
347  visualizer.append_clusters(clusters, sample)
348 
349  if figure is None:
350  figure = visualizer.show(display = False)
351  else:
352  visualizer.show(figure = figure, display = False)
353 
354  if len(sample[0]) == 2:
355  ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means)
356 
357  if display is True:
358  plt.show()
359 
360  return figure
361 
362 
363  @staticmethod
364  def animate_cluster_allocation(data, observer, animation_velocity = 75, movie_fps = 1, save_movie = None):
365  """!
366  @brief Animates clustering process that is performed by EM algorithm.
367 
368  @param[in] data (list): Dataset that is used for clustering.
369  @param[in] observer (ema_observer): EM observer that was used for collection information about clustering process.
370  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
371  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
372  @param[in] save_movie (string): If it is specified then animation will be stored to file that is specified in this parameter.
373 
374  """
375 
376  figure = plt.figure()
377 
378  def init_frame():
379  return frame_generation(0)
380 
381  def frame_generation(index_iteration):
382  figure.clf()
383 
384  figure.suptitle("EM algorithm (iteration: " + str(index_iteration) +")", fontsize = 18, fontweight = 'bold')
385 
386  clusters = observer.get_evolution_clusters()[index_iteration]
387  covariances = observer.get_evolution_covariances()[index_iteration]
388  means = observer.get_evolution_means()[index_iteration]
389 
390  ema_visualizer.show_clusters(clusters, data, covariances, means, figure, False)
391  figure.subplots_adjust(top = 0.85)
392 
393  return [ figure.gca() ]
394 
395  iterations = len(observer)
396  cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000)
397 
398  if save_movie is not None:
399  cluster_animation.save(save_movie, writer = 'ffmpeg', fps = movie_fps, bitrate = 1500)
400  else:
401  plt.show()
402 
403 
404  @staticmethod
405  def __draw_ellipses(figure, visualizer, clusters, covariances, means):
406  ax = figure.get_axes()[0]
407 
408  for index in range(len(clusters)):
409  angle, width, height = calculate_ellipse_description(covariances[index])
410  color = visualizer.get_cluster_color(index, 0)
411 
412  ema_visualizer.__draw_ellipse(ax, means[index][0], means[index][1], angle, width, height, color)
413 
414 
415  @staticmethod
416  def __draw_ellipse(ax, x, y, angle, width, height, color):
417  if (width > 0.0) and (height > 0.0):
418  ax.plot(x, y, color=color, marker='x', markersize=6)
419  ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=-angle, linewidth=2, fill=True, zorder=2, color=color)
420  ax.add_patch(ellipse)
421 
422 
423 
424 class ema:
425  """!
426  @brief Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
427  @details The algorithm provides only clustering services (unsupervised learning).
428  Here an example of data clustering process:
429  @code
430  from pyclustering.cluster.ema import ema, ema_visualizer
431  from pyclustering.utils import read_sample
432  from pyclustering.samples.definitions import FCPS_SAMPLES
433 
434  # Read data from text file.
435  sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
436 
437  # Create EM algorithm to allocated four clusters.
438  ema_instance = ema(sample, 3)
439 
440  # Run clustering process.
441  ema_instance.process()
442 
443  # Get clustering results.
444  clusters = ema_instance.get_clusters()
445  covariances = ema_instance.get_covariances()
446  means = ema_instance.get_centers()
447 
448  # Visualize obtained clustering results.
449  ema_visualizer.show_clusters(clusters, sample, covariances, means)
450  @endcode
451 
452  Here is clustering results of the Expectation-Maximization clustering algorithm where popular sample 'OldFaithful' was used.
453  Initial random means and covariances were used in the example. The first step is presented on the left side of the figure and
454  final result (the last step) is on the right side:
455  @image html ema_old_faithful_clustering.png
456 
457  @see ema_visualizer
458  @see ema_observer
459 
460  """
461  def __init__(self, data, amount_clusters, means=None, variances=None, observer=None, tolerance=0.00001, iterations=100):
462  """!
463  @brief Initializes Expectation-Maximization algorithm for cluster analysis.
464 
465  @param[in] data (list): Dataset that should be analysed and where each point (object) is represented by the list of coordinates.
466  @param[in] amount_clusters (uint): Amount of clusters that should be allocated.
467  @param[in] means (list): Initial means of clusters (amount of means should be equal to amount of clusters for allocation).
468  If this parameter is 'None' then K-Means algorithm with K-Means++ method will be used for initialization by default.
469  @param[in] variances (list): Initial cluster variances (or covariances in case of multi-dimensional data). Amount of
470  covariances should be equal to amount of clusters that should be allocated. If this parameter is 'None' then
471  K-Means algorithm with K-Means++ method will be used for initialization by default.
472  @param[in] observer (ema_observer): Observer for gathering information about clustering process.
473  @param[in] tolerance (float): Defines stop condition of the algorithm (when difference between current and
474  previous log-likelihood estimation is less then 'tolerance' then clustering is over).
475  @param[in] iterations (uint): Additional stop condition parameter that defines maximum number of steps that can be
476  performed by the algorithm during clustering process.
477 
478  """
479 
480  self.__data = numpy.array(data)
481  self.__amount_clusters = amount_clusters
482  self.__tolerance = tolerance
483  self.__iterations = iterations
484  self.__observer = observer
485 
486  self.__means = means
487  self.__variances = variances
488 
489  self.__verify_arguments()
490 
491  if (means is None) or (variances is None):
492  self.__means, self.__variances = ema_initializer(data, amount_clusters).initialize(ema_init_type.KMEANS_INITIALIZATION)
493 
494  if len(self.__means) != amount_clusters:
495  self.__amount_clusters = len(self.__means)
496 
497  self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ]
498  self.__pic = [1.0] * amount_clusters
499  self.__clusters = []
500  self.__gaussians = [ [] for _ in range(amount_clusters) ]
501  self.__stop = False
502 
503 
504  def process(self):
505  """!
506  @brief Run clustering process of the algorithm.
507 
508  @return (ema) Returns itself (EMA instance).
509 
510  """
511 
512  previous_likelihood = -200000
513  current_likelihood = -100000
514 
515  current_iteration = 0
516  while(self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) and (current_iteration < self.__iterations):
517  self.__expectation_step()
518  self.__maximization_step()
519 
520  current_iteration += 1
521 
522  self.__extract_clusters()
523  self.__notify()
524 
525  previous_likelihood = current_likelihood
526  current_likelihood = self.__log_likelihood()
527  self.__stop = self.__get_stop_condition()
528 
530  return self
531 
532 
533  def get_clusters(self):
534  """!
535  @return (list) Allocated clusters where each cluster is represented by list of indexes of points from dataset,
536  for example, two cluster may have following representation [[0, 1, 4], [2, 3, 5, 6]].
537 
538  """
539  return self.__clusters
540 
541 
542  def get_centers(self):
543  """!
544  @return (list) Corresponding centers (means) of clusters.
545 
546  """
547 
548  return self.__means
549 
550 
551  def get_covariances(self):
552  """!
553  @return (list) Corresponding variances (or covariances in case of multi-dimensional data) of clusters.
554 
555  """
556 
557  return self.__variances
558 
559 
560  def get_probabilities(self):
561  """!
562  @brief Returns 2-dimensional list with belong probability of each object from data to cluster correspondingly,
563  where that first index is for cluster and the second is for point.
564 
565  @code
566  # Get belong probablities
567  probabilities = ema_instance.get_probabilities();
568 
569  # Show porbability of the fifth element in the first and in the second cluster
570  index_point = 5;
571  print("Probability in the first cluster:", probabilities[0][index_point]);
572  print("Probability in the first cluster:", probabilities[1][index_point]);
573  @endcode
574 
575  @return (list) 2-dimensional list with belong probability of each object from data to cluster.
576 
577  """
578 
579  return self.__rc
580 
581 
582  def __erase_empty_clusters(self):
583  clusters, means, variances, pic, gaussians, rc = [], [], [], [], [], []
584 
585  for index_cluster in range(len(self.__clusters)):
586  if len(self.__clusters[index_cluster]) > 0:
587  clusters.append(self.__clusters[index_cluster])
588  means.append(self.__means[index_cluster])
589  variances.append(self.__variances[index_cluster])
590  pic.append(self.__pic[index_cluster])
591  gaussians.append(self.__gaussians[index_cluster])
592  rc.append(self.__rc[index_cluster])
593 
594  if len(self.__clusters) != len(clusters):
595  self.__clusters, self.__means, self.__variances, self.__pic = clusters, means, variances, pic
596  self.__gaussians, self.__rc = gaussians, rc
597  self.__amount_clusters = len(self.__clusters)
598 
599 
600  def __notify(self):
601  if self.__observer is not None:
602  self.__observer.notify(self.__means, self.__variances, self.__clusters)
603 
604 
605  def __extract_clusters(self):
606  self.__clusters = [[] for _ in range(self.__amount_clusters)]
607  for index_point in range(len(self.__data)):
608  candidates = []
609  for index_cluster in range(self.__amount_clusters):
610  candidates.append((index_cluster, self.__rc[index_cluster][index_point]))
611 
612  index_winner = max(candidates, key=lambda candidate: candidate[1])[0]
613  self.__clusters[index_winner].append(index_point)
614 
616 
617 
618  def __log_likelihood(self):
619  likelihood = 0.0
620 
621  for index_point in range(len(self.__data)):
622  particle = 0.0
623  for index_cluster in range(self.__amount_clusters):
624  particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point]
625 
626  if particle > 0.0:
627  likelihood += numpy.log(particle)
628 
629  return likelihood
630 
631 
632  def __probabilities(self, index_cluster, index_point):
633  divider = 0.0
634  for i in range(self.__amount_clusters):
635  divider += self.__pic[i] * self.__gaussians[i][index_point]
636 
637  if (divider != 0.0) and (divider != float('inf')):
638  return self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider
639 
640  return 1.0
641 
642 
643  def __expectation_step(self):
644  self.__gaussians = [ [] for _ in range(self.__amount_clusters) ]
645  for index in range(self.__amount_clusters):
646  self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index])
647 
648  self.__rc = [ [0.0] * len(self.__data) for _ in range(self.__amount_clusters) ]
649  for index_cluster in range(self.__amount_clusters):
650  for index_point in range(len(self.__data)):
651  self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point)
652 
653 
654  def __maximization_step(self):
655  self.__pic = []
656  self.__means = []
657  self.__variances = []
658 
659  amount_impossible_clusters = 0
660 
661  for index_cluster in range(self.__amount_clusters):
662  mc = numpy.sum(self.__rc[index_cluster])
663 
664  if mc == 0.0:
665  amount_impossible_clusters += 1
666  continue
667 
668  self.__pic.append( mc / len(self.__data) )
669  self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) )
670  self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) )
671 
672  self.__amount_clusters -= amount_impossible_clusters
673 
674 
675  def __get_stop_condition(self):
676  for covariance in self.__variances:
677  if numpy.linalg.norm(covariance) == 0.0:
678  return True
679 
680  return False
681 
682 
683  def __update_covariance(self, means, rc, mc):
684  covariance = 0.0
685  for index_point in range(len(self.__data)):
686  deviation = numpy.array([self.__data[index_point] - means])
687  covariance += rc[index_point] * deviation.T.dot(deviation)
688 
689  covariance = covariance / mc
690  return covariance
691 
692 
693  def __update_mean(self, rc, mc):
694  mean = 0.0
695  for index_point in range(len(self.__data)):
696  mean += rc[index_point] * self.__data[index_point]
697 
698  mean = mean / mc
699  return mean
700 
701 
702  def __normalize_probabilities(self):
703  for index_point in range(len(self.__data)):
704  probability = 0.0
705  for index_cluster in range(len(self.__clusters)):
706  probability += self.__rc[index_cluster][index_point]
707 
708  if abs(probability - 1.0) > 0.000001:
709  self.__normalize_probability(index_point, probability)
710 
711 
712  def __normalize_probability(self, index_point, probability):
713  if probability == 0.0:
714  return
715 
716  normalization = 1.0 / probability
717 
718  for index_cluster in range(len(self.__clusters)):
719  self.__rc[index_cluster][index_point] *= normalization
720 
721 
722  def __verify_arguments(self):
723  """!
724  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
725 
726  """
727  if len(self.__data) == 0:
728  raise ValueError("Input data is empty (size: '%d')." % len(self.__data))
729 
730  if self.__amount_clusters < 1:
731  raise ValueError("Amount of clusters (current value '%d') should be greater or equal to 1." %
732  self.__amount_clusters)
pyclustering.cluster.ema.ema.__tolerance
__tolerance
Definition: ema.py:482
pyclustering.cluster.center_initializer.kmeans_plusplus_initializer
K-Means++ is an algorithm for choosing the initial centers for algorithms like K-Means or X-Means.
Definition: center_initializer.py:95
pyclustering.cluster.ema.gaussian
def gaussian(data, mean, covariance)
Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covar...
Definition: ema.py:29
pyclustering.cluster.ema.ema.get_covariances
def get_covariances(self)
Definition: ema.py:551
pyclustering.cluster.ema.ema.__means
__means
Definition: ema.py:486
pyclustering.cluster.ema.ema.__stop
__stop
Definition: ema.py:501
pyclustering.cluster.ema.ema.__log_likelihood
def __log_likelihood(self)
Definition: ema.py:618
pyclustering.cluster.ema.ema.__probabilities
def __probabilities(self, index_cluster, index_point)
Definition: ema.py:632
pyclustering.cluster.ema.ema.__pic
__pic
Definition: ema.py:498
pyclustering.cluster.kmeans.kmeans
Class implements K-Means clustering algorithm.
Definition: kmeans.py:253
pyclustering.cluster.ema.ema.__gaussians
__gaussians
Definition: ema.py:500
pyclustering.cluster.ema.ema.get_centers
def get_centers(self)
Definition: ema.py:542
pyclustering.cluster.cluster_visualizer
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:370
pyclustering.cluster.ema.ema_initializer.__initialize_random
def __initialize_random(self)
Definition: ema.py:174
pyclustering.cluster.ema.ema_observer
Observer of EM algorithm for collecting algorithm state on each step.
Definition: ema.py:216
pyclustering.cluster.ema.ema_initializer.__calculate_initial_clusters
def __calculate_initial_clusters(self, centers)
Calculate Euclidean distance to each point from the each cluster.
Definition: ema.py:137
pyclustering.cluster.ema.ema_observer.__covariances_evolution
__covariances_evolution
Definition: ema.py:260
pyclustering.cluster.ema.ema.__iterations
__iterations
Definition: ema.py:483
pyclustering.cluster.center_initializer
Collection of center initializers for algorithm that uses initial centers, for example,...
Definition: center_initializer.py:1
pyclustering.cluster.ema.ema.__maximization_step
def __maximization_step(self)
Definition: ema.py:654
pyclustering.cluster.ema.ema_initializer.__init__
def __init__(self, sample, amount)
Constructs EM initializer.
Definition: ema.py:106
pyclustering.cluster.ema.ema.get_clusters
def get_clusters(self)
Definition: ema.py:533
pyclustering.cluster.ema.ema.__update_mean
def __update_mean(self, rc, mc)
Definition: ema.py:693
pyclustering.cluster.ema.ema_visualizer.animate_cluster_allocation
def animate_cluster_allocation(data, observer, animation_velocity=75, movie_fps=1, save_movie=None)
Animates clustering process that is performed by EM algorithm.
Definition: ema.py:364
pyclustering.cluster.ema.ema_initializer.initialize
def initialize(self, init_type=ema_init_type.KMEANS_INITIALIZATION)
Calculates initial parameters for EM algorithm: means and covariances using specified strategy.
Definition: ema.py:118
pyclustering.cluster.ema.ema.__update_covariance
def __update_covariance(self, means, rc, mc)
Definition: ema.py:683
pyclustering.cluster.ema.ema.__observer
__observer
Definition: ema.py:484
pyclustering.cluster.ema.ema_initializer.__sample
__sample
Definition: ema.py:114
pyclustering.cluster.ema.ema_observer.get_evolution_covariances
def get_evolution_covariances(self)
Definition: ema.py:288
pyclustering.cluster.ema.ema.__notify
def __notify(self)
Definition: ema.py:600
pyclustering.cluster.ema.ema_initializer
Provides services for preparing initial means and covariances for Expectation-Maximization algorithm.
Definition: ema.py:83
pyclustering.cluster.ema.ema_observer.get_evolution_means
def get_evolution_means(self)
Definition: ema.py:280
pyclustering.cluster.ema.ema_observer.notify
def notify(self, means, covariances, clusters)
This method is used by the algorithm to notify observer about changes where the algorithm should prov...
Definition: ema.py:304
pyclustering.cluster
pyclustering module for cluster analysis.
Definition: __init__.py:1
pyclustering.cluster.ema.ema.__verify_arguments
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: ema.py:722
pyclustering.cluster.ema.ema.__normalize_probability
def __normalize_probability(self, index_point, probability)
Definition: ema.py:712
pyclustering.cluster.ema.ema_initializer.__amount
__amount
Definition: ema.py:115
pyclustering.cluster.ema.ema.__expectation_step
def __expectation_step(self)
Definition: ema.py:643
pyclustering.cluster.ema.ema.process
def process(self)
Run clustering process of the algorithm.
Definition: ema.py:504
pyclustering.cluster.ema.ema_init_type
Enumeration of initialization types for Expectation-Maximization algorithm.
Definition: ema.py:65
pyclustering.cluster.ema.ema.__normalize_probabilities
def __normalize_probabilities(self)
Definition: ema.py:702
pyclustering.cluster.kmeans
The module contains K-Means algorithm and other related services.
Definition: kmeans.py:1
pyclustering.cluster.ema.ema_visualizer.show_clusters
def show_clusters(clusters, sample, covariances, means, figure=None, display=True)
Draws clusters and in case of two-dimensional dataset draws their ellipses.
Definition: ema.py:329
pyclustering.cluster.ema.ema_observer.get_evolution_clusters
def get_evolution_clusters(self)
Definition: ema.py:296
pyclustering.cluster.ema.ema.__get_stop_condition
def __get_stop_condition(self)
Definition: ema.py:675
pyclustering.cluster.ema.ema.__init__
def __init__(self, data, amount_clusters, means=None, variances=None, observer=None, tolerance=0.00001, iterations=100)
Initializes Expectation-Maximization algorithm for cluster analysis.
Definition: ema.py:461
pyclustering.cluster.ema.ema_observer.__init__
def __init__(self)
Initializes EM observer.
Definition: ema.py:254
pyclustering.cluster.ema.ema
Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
Definition: ema.py:424
pyclustering.cluster.ema.ema.get_probabilities
def get_probabilities(self)
Returns 2-dimensional list with belong probability of each object from data to cluster correspondingl...
Definition: ema.py:560
pyclustering.cluster.ema.ema.__data
__data
Definition: ema.py:480
pyclustering.cluster.ema.ema_observer.__means_evolution
__means_evolution
Definition: ema.py:259
pyclustering.cluster.ema.ema_observer.__clusters_evolution
__clusters_evolution
Definition: ema.py:261
pyclustering.cluster.ema.ema.__extract_clusters
def __extract_clusters(self)
Definition: ema.py:605
pyclustering.cluster.ema.ema_visualizer
Visualizer of EM algorithm's results.
Definition: ema.py:320
pyclustering.utils
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
pyclustering.cluster.ema.ema.__amount_clusters
__amount_clusters
Definition: ema.py:481
pyclustering.cluster.ema.ema_observer.__len__
def __len__(self)
Definition: ema.py:264
pyclustering.cluster.ema.ema_initializer.__initialize_kmeans
def __initialize_kmeans(self)
Definition: ema.py:195
pyclustering.cluster.ema.ema_initializer.__calculate_initial_covariances
def __calculate_initial_covariances(self, initial_clusters)
Definition: ema.py:161
pyclustering.cluster.ema.ema.__variances
__variances
Definition: ema.py:487
pyclustering.cluster.ema.ema_observer.get_iterations
def get_iterations(self)
Definition: ema.py:272
pyclustering.cluster.ema.ema.__erase_empty_clusters
def __erase_empty_clusters(self)
Definition: ema.py:582
pyclustering.cluster.ema.ema.__clusters
__clusters
Definition: ema.py:499
pyclustering.cluster.ema.ema.__rc
__rc
Definition: ema.py:497