ema.py
1 """!
2
3 @brief Cluster analysis algorithm: Expectation-Maximization Algorithm for Gaussian Mixture Model.
4 @details Implementation based on paper @cite article::ema::1.
5
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2018
9
11  PyClustering is free software: you can redistribute it and/or modify
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24
25 """
26
27
28 import numpy
29 import random
30 import warnings
31
32 from pyclustering.cluster import cluster_visualizer
33 from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
34 from pyclustering.cluster.kmeans import kmeans
35
36 from pyclustering.utils import pi, calculate_ellipse_description, euclidean_distance_square
37
38 from enum import IntEnum
39
40 try:
41  import matplotlib.pyplot as plt
42  import matplotlib.animation as animation
43  from matplotlib import patches
44 except Exception as error_instance:
45  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
46  "functionality is not available (details: '%s')." % str(error_instance))
47
48 def gaussian(data, mean, covariance):
49  """!
50  @brief Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covariance in case
51  multi-dimensional data.
52
53  @param[in] data (list): Data that is used for gaussian calculation.
54  @param[in] mean (float|numpy.array): Mathematical expectation used for calculation.
55  @param[in] covariance (float|numpy.array): Variance or covariance matrix for calculation.
56
57  @return (list) Value of gaussian function for each point in dataset.
58
59  """
60  dimension = float(len(data[0]))
61
62  if dimension != 1.0:
63  inv_variance = numpy.linalg.pinv(covariance)
64  else:
65  inv_variance = 1.0 / covariance
66
67  divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance))
68  if divider != 0.0:
69  right_const = 1.0 / divider
70  else:
71  right_const = float('inf')
72
73  result = []
74
75  for point in data:
76  mean_delta = point - mean
77  point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) )
78  result.append(point_gaussian)
79
80  return result
81
82
83
84 class ema_init_type(IntEnum):
85  """!
86  @brief Enumeration of initialization types for Expectation-Maximization algorithm.
87
88  """
89
90
92  RANDOM_INITIALIZATION = 0
93
94
98  KMEANS_INITIALIZATION = 1
99
100
101
103  """!
104  @brief Provides servies for preparing initial means and covariances for Expectation-Maximization algorithm.
105  @details Initialization strategy is defined by enumerator 'ema_init_type': random initialization and
106  kmeans with kmeans++ initialization. Here an example of initialization using kmeans strategy:
107
108  @code
110  from pyclustering.samples.definitions import FAMOUS_SAMPLES;
111  from pyclustering.cluster.ema import ema_initializer;
112
114  amount_clusters = 2;
115
116  initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize(initializer);
117  print(initial_means);
118  print(initial_covariance);
119  @endcode
120
121  """
122
123  __MAX_GENERATION_ATTEMPTS = 10
124
125  def __init__(self, sample, amount):
126  """!
127  @brief Constructs EM initializer.
128
129  @param[in] sample (list): Data that will be used by the EM algorithm.
130  @param[in] amount (uint): Amount of clusters that should be allocated by the EM algorithm.
131
132  """
133  self.__sample = sample
134  self.__amount = amount
135
136
137  def initialize(self, init_type = ema_init_type.KMEANS_INITIALIZATION):
138  """!
139  @brief Calculates initial parameters for EM algorithm: means and covariances using
140  specified strategy.
141
142  @param[in] init_type (ema_init_type): Strategy for initialization.
143
144  @return (float|list, float|numpy.array) Initial means and variance (covariance matrix in case multi-dimensional data).
145
146  """
147  if init_type == ema_init_type.KMEANS_INITIALIZATION:
148  return self.__initialize_kmeans()
149
150  elif init_type == ema_init_type.RANDOM_INITIALIZATION:
151  return self.__initialize_random()
152
153  raise NameError("Unknown type of EM algorithm initialization is specified.")
154
155
156  def __calculate_initial_clusters(self, centers):
157  """!
158  @brief Calculate Euclidean distance to each point from the each cluster.
159  @brief Nearest points are captured by according clusters and as a result clusters are updated.
160
161  @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
162
163  """
164
165  clusters = [[] for _ in range(len(centers))]
166  for index_point in range(len(self.__sample)):
167  index_optim, dist_optim = -1, 0.0
168
169  for index in range(len(centers)):
170  dist = euclidean_distance_square(self.__sample[index_point], centers[index])
171
172  if (dist < dist_optim) or (index is 0):
173  index_optim, dist_optim = index, dist
174
175  clusters[index_optim].append(index_point)
176
177  return clusters
178
179
180  def __calculate_initial_covariances(self, initial_clusters):
181  covariances = []
182  for initial_cluster in initial_clusters:
183  if len(initial_cluster) > 1:
184  cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ]
185  covariances.append(numpy.cov(cluster_sample, rowvar = False))
186  else:
187  dimension = len(self.__sample[0])
188  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
189
190  return covariances
191
192
193  def __initialize_random(self):
194  initial_means = []
195
196  for _ in range(self.__amount):
197  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
198  attempts = 0
199  while (mean in initial_means) and (attempts < ema_initializer.__MAX_GENERATION_ATTEMPTS):
200  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
201  attempts += 1
202
203  if attempts == ema_initializer.__MAX_GENERATION_ATTEMPTS:
204  mean = [ value + (random.random() - 0.5) * value * 0.2 for value in mean ]
205
206  initial_means.append(mean)
207
208  initial_clusters = self.__calculate_initial_clusters(initial_means)
209  initial_covariance = self.__calculate_initial_covariances(initial_clusters)
210
211  return initial_means, initial_covariance
212
213
214  def __initialize_kmeans(self):
215  initial_centers = kmeans_plusplus_initializer(self.__sample, self.__amount).initialize()
216  kmeans_instance = kmeans(self.__sample, initial_centers, ccore = True)
217  kmeans_instance.process()
218
219  means = kmeans_instance.get_centers()
220
221  covariances = []
222  initial_clusters = kmeans_instance.get_clusters()
223  for initial_cluster in initial_clusters:
224  if len(initial_cluster) > 1:
225  cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ]
226  covariances.append(numpy.cov(cluster_sample, rowvar = False))
227  else:
228  dimension = len(self.__sample[0])
229  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
230
231  return means, covariances
232
233
234
236  """!
237  @brief Observer of EM algorithm for collecting algorithm state on each step.
238  @details It can be used to obtain whole picture about clustering process of EM algorithm. Allocated clusters,
239  means and covariances are stored in observer on each step. Here an example of usage:
240
241  @code
242  from pyclustering.cluster.ema import ema, ema_observer;
244  from pyclustering.samples.definitions import SIMPLE_SAMPLES;
245
246  # Read data from text file
248
249  # Create EM observer
250  observer = ema_observer();
251
252  # Create EM algorithm to allocated four clusters and pass observer to it
253  ema_instance = ema(sample, 4, observer=observer);
254
255  # Run clustering process
256  ema_instance.process();
257
258  # Print amount of steps that were done by the algorithm
259  print("EMA steps:", observer.get_iterations());
260
261  # Print evolution of means and covariances
262  print("Means evolution:", observer.get_evolution_means());
263  print("Covariances evolution:", observer.get_evolution_covariances());
264
265  # Print evolution of clusters
266  print("Clusters evolution:", observer.get_evolution_clusters());
267
268  # Print final clusters
269  print("Allocated clusters:", observer.get_evolution_clusters()[-1]);
270  @endcode
271
272  """
273  def __init__(self):
274  """!
275  @brief Initializes EM observer.
276
277  """
278  self.__means_evolution = []
279  self.__covariances_evolution = []
280  self.__clusters_evolution = []
281
282
283  def __len__(self):
284  """!
285  @return (uint) Amount of iterations that were done by the EM algorithm.
286
287  """
288  return len(self.__means_evolution)
289
290
291  def get_iterations(self):
292  """!
293  @return (uint) Amount of iterations that were done by the EM algorithm.
294
295  """
296  return len(self.__means_evolution)
297
298
300  """!
301  @return (list) Mean of each cluster on each step of clustering.
302
303  """
304  return self.__means_evolution
305
306
308  """!
309  @return (list) Covariance matrix (or variance in case of one-dimensional data) of each cluster on each step of clustering.
310
311  """
312  return self.__covariances_evolution
313
314
316  """!
317  @return (list) Allocated clusters on each step of clustering.
318
319  """
320  return self.__clusters_evolution
321
322
323  def notify(self, means, covariances, clusters):
324  """!
325  @brief This method is used by the algorithm to notify observer about changes where the algorithm
326  should provide new values: means, covariances and allocated clusters.
327
328  @param[in] means (list): Mean of each cluster on currect step.
329  @param[in] covariances (list): Covariances of each cluster on current step.
330  @param[in] clusters (list): Allocated cluster on current step.
331
332  """
333  self.__means_evolution.append(means)
334  self.__covariances_evolution.append(covariances)
335  self.__clusters_evolution.append(clusters)
336
337
338
340  """!
341  @brief Visualizer of EM algorithm's results.
342  @details Provides services for visualization of particular features of the algorithm, for example,
343  in case of two-dimensional dataset it shows covariance ellipses.
344
345  """
346
347  @staticmethod
348  def show_clusters(clusters, sample, covariances, means, figure = None, display = True):
349  """!
350  @brief Draws clusters and in case of two-dimensional dataset draws their ellipses.
351
352  @param[in] clusters (list): Clusters that were allocated by the algorithm.
353  @param[in] sample (list): Dataset that were used for clustering.
354  @param[in] covariances (list): Covariances of the clusters.
355  @param[in] means (list): Means of the clusters.
356  @param[in] figure (figure): If 'None' then new is figure is creater, otherwise specified figure is used
357  for visualization.
358  @param[in] display (bool): If 'True' then figure will be shown by the method, otherwise it should be
359  shown manually using matplotlib function 'plt.show()'.
360
361  @return (figure) Figure where clusters were drawn.
362
363  """
364
365  visualizer = cluster_visualizer()
366  visualizer.append_clusters(clusters, sample)
367
368  if figure is None:
369  figure = visualizer.show(display = False)
370  else:
371  visualizer.show(figure = figure, display = False)
372
373  if len(sample[0]) == 2:
374  ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means)
375
376  if display is True:
377  plt.show()
378
379  return figure
380
381
382  @staticmethod
383  def animate_cluster_allocation(data, observer, animation_velocity = 75, movie_fps = 1, save_movie = None):
384  """!
385  @brief Animates clustering process that is performed by EM algorithm.
386
387  @param[in] data (list): Dataset that is used for clustering.
388  @param[in] observer (ema_observer): EM observer that was used for collection information about clustering process.
389  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
390  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
391  @param[in] save_movie (string): If it is specified then animation will be stored to file that is specified in this parameter.
392
393  """
394
395  figure = plt.figure()
396
397  def init_frame():
398  return frame_generation(0)
399
400  def frame_generation(index_iteration):
401  figure.clf()
402
403  figure.suptitle("EM algorithm (iteration: " + str(index_iteration) +")", fontsize = 18, fontweight = 'bold')
404
405  clusters = observer.get_evolution_clusters()[index_iteration]
406  covariances = observer.get_evolution_covariances()[index_iteration]
407  means = observer.get_evolution_means()[index_iteration]
408
409  ema_visualizer.show_clusters(clusters, data, covariances, means, figure, False)
411
412  return [ figure.gca() ]
413
414  iterations = len(observer)
415  cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000)
416
417  if save_movie is not None:
418  cluster_animation.save(save_movie, writer = 'ffmpeg', fps = movie_fps, bitrate = 1500)
419  else:
420  plt.show()
421
422
423  @staticmethod
424  def __draw_ellipses(figure, visualizer, clusters, covariances, means):
425  ax = figure.get_axes()[0]
426
427  for index in range(len(clusters)):
428  angle, width, height = calculate_ellipse_description(covariances[index])
429  color = visualizer.get_cluster_color(index, 0)
430
431  ema_visualizer.__draw_ellipse(ax, means[index][0], means[index][1], angle, width, height, color)
432
433
434  @staticmethod
435  def __draw_ellipse(ax, x, y, angle, width, height, color):
436  if (width > 0.0) and (height > 0.0):
437  ax.plot(x, y, color=color, marker='x', markersize=6)
438  ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=-angle, linewidth=2, fill=True, zorder=2, color=color)
440
441
442
443 class ema:
444  """!
445  @brief Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
446  @details The algorithm provides only clustering services (unsupervised learning).
447  Here an example of data clustering process:
448  @code
449  # Read dataset from text file
451
452  # Amount of cluster that should be allocated
453  amount = 2;
454
455  # Prepare initial means and covariances using K-Means initializer
456  initializer = ema_init_type.KMEANS_INITIALIZATION;
457  initial_means, initial_covariance = ema_initializer(sample, amount).initialize(initializer);
458
459  # Lets create observer to see clustering process
460  observer = ema_observer();
461
462  # Create instance of the EM algorithm
463  ema_instance = ema(sample, amount, initial_means, initial_covariance, observer=observer);
464
465  # Run clustering process
466  ema_instance.process();
467
468  # Extract clusters
469  clusters = ema_instance.get_clusters();
470  print("Obtained clusters:", clusters);
471
472  # Display allocated clusters using visualizer
473  covariances = ema_instance.get_covariances();
474  means = ema_instance.get_centers();
475  ema_visualizer.show_clusters(clusters, sample, covariances, means);
476
477  # Show animation process
478  ema_visualizer.animate_cluster_allocation(sample, observer);
479
480  @endcode
481
482  Here is clustering results of the Expectation-Maximization clustering algorithm where popular sample 'OldFaithful' was used.
483  Initial random means and covariances were used in the example. The first step is presented on the left side of the figure and
484  final result (the last step) is on the right side:
485  @image html ema_old_faithful_clustering.png
486
487  @see ema_visualizer
488  @see ema_observer
489
490  """
491  def __init__(self, data, amount_clusters, means = None, variances = None, observer = None, tolerance = 0.00001, iterations = 100):
492  """!
493  @brief Initializes Expectation-Maximization algorithm for cluster analysis.
494
495  @param[in] data (list): Dataset that should be analysed and where each point (object) is represented by the list of coordinates.
496  @param[in] amount_clusters (uint): Amount of clusters that should be allocated.
497  @param[in] means (list): Initial means of clusters (amount of means should be equal to amount of clusters for allocation).
498  If this parameter is 'None' then K-Means algorithm with K-Means++ method will be used for initialization by default.
499  @param[in] variances (list): Initial cluster variances (or covariances in case of multi-dimensional data). Amount of
500  covariances should be equal to amount of clusters that should be allocated. If this parameter is 'None' then
501  K-Means algorithm with K-Means++ method will be used for initialization by default.
502  @param[in] observer (ema_observer): Observer for gathering information about clustering process.
503  @param[in] tolerance (float): Defines stop condition of the algorithm (when difference between current and
504  previous log-likelihood estimation is less then 'tolerance' then clustering is over).
505  @param[in] iterations (uint): Additional stop condition parameter that defines maximum number of steps that can be
506  performed by the algorithm during clustering process.
507
508  """
509
510  self.__data = numpy.array(data)
511  self.__amount_clusters = amount_clusters
512  self.__tolerance = tolerance
513  self.__iterations = iterations
514  self.__observer = observer
515
516  self.__means = means
517  self.__variances = variances
518
519  if (means is None) or (variances is None):
520  self.__means, self.__variances = ema_initializer(data, amount_clusters).initialize(ema_init_type.KMEANS_INITIALIZATION)
521
522  if len(self.__means) != amount_clusters:
523  self.__amount_clusters = len(self.__means)
524
525  self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ]
526  self.__pic = [1.0] * amount_clusters
527  self.__clusters = []
528  self.__gaussians = [ [] for _ in range(amount_clusters) ]
529  self.__stop = False
530
531
532  def process(self):
533  """!
534  @brief Run clustering process of the algorithm.
535  @details This method should be called before call 'get_clusters()'.
536
537  """
538
539  previous_likelihood = -200000
540  current_likelihood = -100000
541
542  current_iteration = 0
543  while(self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) and (current_iteration < self.__iterations):
544  self.__expectation_step()
545  self.__maximization_step()
546
547  current_iteration += 1
548
549  self.__extract_clusters()
550  self.__notify()
551
552  previous_likelihood = current_likelihood
553  current_likelihood = self.__log_likelihood()
554  self.__stop = self.__get_stop_condition()
555
557
558
559  def get_clusters(self):
560  """!
561  @return (list) Allocated clusters where each cluster is represented by list of indexes of points from dataset,
562  for example, two cluster may have following representation [[0, 1, 4], [2, 3, 5, 6]].
563
564  """
565  return self.__clusters
566
567
568  def get_centers(self):
569  """!
570  @return (list) Corresponding centers (means) of clusters.
571
572  """
573
574  return self.__means
575
576
577  def get_covariances(self):
578  """!
579  @return (list) Corresponding variances (or covariances in case of multi-dimensional data) of clusters.
580
581  """
582
583  return self.__variances
584
585
586  def get_probabilities(self):
587  """!
588  @brief Returns 2-dimensional list with belong probability of each object from data to cluster correspondingly,
589  where that first index is for cluster and the second is for point.
590
591  @code
592  # Get belong probablities
593  probabilities = ema_instance.get_probabilities();
594
595  # Show porbability of the fifth element in the first and in the second cluster
596  index_point = 5;
597  print("Probability in the first cluster:", probabilities[0][index_point]);
598  print("Probability in the first cluster:", probabilities[1][index_point]);
599  @endcode
600
601  @return (list) 2-dimensional list with belong probability of each object from data to cluster.
602
603  """
604
605  return self.__rc
606
607
608  def __erase_empty_clusters(self):
609  clusters, means, variances, pic, gaussians, rc = [], [], [], [], [], []
610
611  for index_cluster in range(len(self.__clusters)):
612  if len(self.__clusters[index_cluster]) > 0:
613  clusters.append(self.__clusters[index_cluster])
614  means.append(self.__means[index_cluster])
615  variances.append(self.__variances[index_cluster])
616  pic.append(self.__pic[index_cluster])
617  gaussians.append(self.__gaussians[index_cluster])
618  rc.append(self.__rc[index_cluster])
619
620  if len(self.__clusters) != len(clusters):
621  self.__clusters, self.__means, self.__variances, self.__pic = clusters, means, variances, pic
622  self.__gaussians, self.__rc = gaussians, rc
623  self.__amount_clusters = len(self.__clusters)
624
625
626  def __notify(self):
627  if self.__observer is not None:
628  self.__observer.notify(self.__means, self.__variances, self.__clusters)
629
630
631  def __extract_clusters(self):
632  self.__clusters = [ [] for _ in range(self.__amount_clusters) ]
633  for index_point in range(len(self.__data)):
634  candidates = []
635  for index_cluster in range(self.__amount_clusters):
636  candidates.append((index_cluster, self.__rc[index_cluster][index_point]))
637
638  index_winner = max(candidates, key = lambda candidate : candidate[1])[0]
639  self.__clusters[index_winner].append(index_point)
640
642
643
644  def __log_likelihood(self):
645  likelihood = 0.0
646
647  for index_point in range(len(self.__data)):
648  particle = 0.0
649  for index_cluster in range(self.__amount_clusters):
650  particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point]
651
652  if particle > 0.0:
653  likelihood += numpy.log(particle)
654
655  return likelihood
656
657
658  def __probabilities(self, index_cluster, index_point):
659  divider = 0.0
660  for i in range(self.__amount_clusters):
661  divider += self.__pic[i] * self.__gaussians[i][index_point]
662
663  if (divider != 0.0) and (divider != float('inf')):
664  return self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider
665
666  return 1.0
667
668
669  def __expectation_step(self):
670  self.__gaussians = [ [] for _ in range(self.__amount_clusters) ]
671  for index in range(self.__amount_clusters):
672  self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index])
673
674  self.__rc = [ [0.0] * len(self.__data) for _ in range(self.__amount_clusters) ]
675  for index_cluster in range(self.__amount_clusters):
676  for index_point in range(len(self.__data)):
677  self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point)
678
679
680  def __maximization_step(self):
681  self.__pic = []
682  self.__means = []
683  self.__variances = []
684
685  amount_impossible_clusters = 0
686
687  for index_cluster in range(self.__amount_clusters):
688  mc = numpy.sum(self.__rc[index_cluster])
689
690  if mc == 0.0:
691  amount_impossible_clusters += 1
692  continue
693
694  self.__pic.append( mc / len(self.__data) )
695  self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) )
696  self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) )
697
698  self.__amount_clusters -= amount_impossible_clusters
699
700
701  def __get_stop_condition(self):
702  for covariance in self.__variances:
703  if numpy.linalg.norm(covariance) == 0.0:
704  return True
705
706  return False
707
708
709  def __update_covariance(self, means, rc, mc):
710  covariance = 0.0
711  for index_point in range(len(self.__data)):
712  deviation = numpy.array( [ self.__data[index_point] - means ])
713  covariance += rc[index_point] * deviation.T.dot(deviation)
714
715  covariance = covariance / mc
716  return covariance
717
718
719  def __update_mean(self, rc, mc):
720  mean = 0.0
721  for index_point in range(len(self.__data)):
722  mean += rc[index_point] * self.__data[index_point]
723
724  mean = mean / mc
725  return mean
726
727
728  def __normalize_probabilities(self):
729  for index_point in range(len(self.__data)):
730  probability = 0.0
731  for index_cluster in range(len(self.__clusters)):
732  probability += self.__rc[index_cluster][index_point]
733
734  if abs(probability - 1.0) > 0.000001:
735  self.__normalize_probability(index_point, probability)
736
737
738  def __normalize_probability(self, index_point, probability):
739  if probability == 0.0:
740  return
741
742  normalization = 1.0 / probability
743
744  for index_cluster in range(len(self.__clusters)):
745  self.__rc[index_cluster][index_point] *= normalization
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:359
pyclustering module for cluster analysis.
Definition: __init__.py:1
def __init__(self, sample, amount)
Constructs EM initializer.
Definition: ema.py:125
Cluster analysis algorithm: K-Means.
Definition: kmeans.py:1
def __update_covariance(self, means, rc, mc)
Definition: ema.py:709
def __probabilities(self, index_cluster, index_point)
Definition: ema.py:658
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
def animate_cluster_allocation(data, observer, animation_velocity=75, movie_fps=1, save_movie=None)
Animates clustering process that is performed by EM algorithm.
Definition: ema.py:383
def __extract_clusters(self)
Definition: ema.py:631
K-Means++ is an algorithm for choosing the initial centers for algorithms like K-Means or X-Means...
Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
Definition: ema.py:443
def get_covariances(self)
Definition: ema.py:577
def __get_stop_condition(self)
Definition: ema.py:701
def gaussian(data, mean, covariance)
Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covar...
Definition: ema.py:48
def notify(self, means, covariances, clusters)
This method is used by the algorithm to notify observer about changes where the algorithm should prov...
Definition: ema.py:323
def __expectation_step(self)
Definition: ema.py:669
Observer of EM algorithm for collecting algorithm state on each step.
Definition: ema.py:235
Class represents K-Means clustering algorithm.
Definition: kmeans.py:272
def __calculate_initial_clusters(self, centers)
Calculate Euclidean distance to each point from the each cluster.
Definition: ema.py:156
def initialize(self, init_type=ema_init_type.KMEANS_INITIALIZATION)
Calculates initial parameters for EM algorithm: means and covariances using specified strategy...
Definition: ema.py:137
def __init__(self)
Initializes EM observer.
Definition: ema.py:273
def show_clusters(clusters, sample, covariances, means, figure=None, display=True)
Draws clusters and in case of two-dimensional dataset draws their ellipses.
Definition: ema.py:348
def __normalize_probabilities(self)
Definition: ema.py:728
Provides servies for preparing initial means and covariances for Expectation-Maximization algorithm...
Definition: ema.py:102
def __update_mean(self, rc, mc)
Definition: ema.py:719
def __maximization_step(self)
Definition: ema.py:680
Collection of center initializers for algorithm that uses initial centers, for example, for K-Means or X-Means.
def process(self)
Run clustering process of the algorithm.
Definition: ema.py:532
def get_clusters(self)
Definition: ema.py:559
def get_probabilities(self)
Returns 2-dimensional list with belong probability of each object from data to cluster correspondingl...
Definition: ema.py:586
Visualizer of EM algorithm&#39;s results.
Definition: ema.py:339
def __calculate_initial_covariances(self, initial_clusters)
Definition: ema.py:180
Enumeration of initialization types for Expectation-Maximization algorithm.
Definition: ema.py:84
def __log_likelihood(self)
Definition: ema.py:644
def __normalize_probability(self, index_point, probability)
Definition: ema.py:738
def __init__(self, data, amount_clusters, means=None, variances=None, observer=None, tolerance=0.00001, iterations=100)
Initializes Expectation-Maximization algorithm for cluster analysis.
Definition: ema.py:491
def __erase_empty_clusters(self)
Definition: ema.py:608