 ema.py
1 """!
2
3 @brief Cluster analysis algorithm: Expectation-Maximization Algorithm for Gaussian Mixture Model.
4 @details Implementation based on paper @cite article::ema::1.
5
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2019
9
11  PyClustering is free software: you can redistribute it and/or modify
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24
25 """
26
27
28 import numpy
29 import random
30 import warnings
31
32 from pyclustering.cluster import cluster_visualizer
33 from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
34 from pyclustering.cluster.kmeans import kmeans
35
36 from pyclustering.utils import pi, calculate_ellipse_description, euclidean_distance_square
37
38 from enum import IntEnum
39
40 try:
41  import matplotlib.pyplot as plt
42  import matplotlib.animation as animation
43  from matplotlib import patches
44 except Exception as error_instance:
45  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
46  "functionality is not available (details: '%s')." % str(error_instance))
47
48 def gaussian(data, mean, covariance):
49  """!
50  @brief Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covariance in case
51  multi-dimensional data.
52
53  @param[in] data (list): Data that is used for gaussian calculation.
54  @param[in] mean (float|numpy.array): Mathematical expectation used for calculation.
55  @param[in] covariance (float|numpy.array): Variance or covariance matrix for calculation.
56
57  @return (list) Value of gaussian function for each point in dataset.
58
59  """
60  dimension = float(len(data))
61
62  if dimension != 1.0:
63  inv_variance = numpy.linalg.pinv(covariance)
64  else:
65  inv_variance = 1.0 / covariance
66
67  divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance))
68  if divider != 0.0:
69  right_const = 1.0 / divider
70  else:
71  right_const = float('inf')
72
73  result = []
74
75  for point in data:
76  mean_delta = point - mean
77  point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) )
78  result.append(point_gaussian)
79
80  return result
81
82
83
84 class ema_init_type(IntEnum):
85  """!
86  @brief Enumeration of initialization types for Expectation-Maximization algorithm.
87
88  """
89
90
92  RANDOM_INITIALIZATION = 0
93
94
98  KMEANS_INITIALIZATION = 1
99
100
101
103  """!
104  @brief Provides servies for preparing initial means and covariances for Expectation-Maximization algorithm.
105  @details Initialization strategy is defined by enumerator 'ema_init_type': random initialization and
106  kmeans with kmeans++ initialization. Here an example of initialization using kmeans strategy:
107
108  @code
110  from pyclustering.samples.definitions import FAMOUS_SAMPLES
111  from pyclustering.cluster.ema import ema_initializer
112
114  amount_clusters = 2
115
116  initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize()
117  print(initial_means)
118  print(initial_covariance)
119  @endcode
120
121  """
122
123  __MAX_GENERATION_ATTEMPTS = 10
124
125  def __init__(self, sample, amount):
126  """!
127  @brief Constructs EM initializer.
128
129  @param[in] sample (list): Data that will be used by the EM algorithm.
130  @param[in] amount (uint): Amount of clusters that should be allocated by the EM algorithm.
131
132  """
133  self.__sample = sample
134  self.__amount = amount
135
136
137  def initialize(self, init_type = ema_init_type.KMEANS_INITIALIZATION):
138  """!
139  @brief Calculates initial parameters for EM algorithm: means and covariances using
140  specified strategy.
141
142  @param[in] init_type (ema_init_type): Strategy for initialization.
143
144  @return (float|list, float|numpy.array) Initial means and variance (covariance matrix in case multi-dimensional data).
145
146  """
147  if init_type == ema_init_type.KMEANS_INITIALIZATION:
148  return self.__initialize_kmeans()
149
150  elif init_type == ema_init_type.RANDOM_INITIALIZATION:
151  return self.__initialize_random()
152
153  raise NameError("Unknown type of EM algorithm initialization is specified.")
154
155
156  def __calculate_initial_clusters(self, centers):
157  """!
158  @brief Calculate Euclidean distance to each point from the each cluster.
159  @brief Nearest points are captured by according clusters and as a result clusters are updated.
160
161  @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
162
163  """
164
165  clusters = [[] for _ in range(len(centers))]
166  for index_point in range(len(self.__sample)):
167  index_optim, dist_optim = -1, 0.0
168
169  for index in range(len(centers)):
170  dist = euclidean_distance_square(self.__sample[index_point], centers[index])
171
172  if (dist < dist_optim) or (index is 0):
173  index_optim, dist_optim = index, dist
174
175  clusters[index_optim].append(index_point)
176
177  return clusters
178
179
180  def __calculate_initial_covariances(self, initial_clusters):
181  covariances = []
182  for initial_cluster in initial_clusters:
183  if len(initial_cluster) > 1:
184  cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ]
185  covariances.append(numpy.cov(cluster_sample, rowvar = False))
186  else:
187  dimension = len(self.__sample)
188  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
189
190  return covariances
191
192
193  def __initialize_random(self):
194  initial_means = []
195
196  for _ in range(self.__amount):
197  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
198  attempts = 0
199  while (mean in initial_means) and (attempts < ema_initializer.__MAX_GENERATION_ATTEMPTS):
200  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
201  attempts += 1
202
203  if attempts == ema_initializer.__MAX_GENERATION_ATTEMPTS:
204  mean = [ value + (random.random() - 0.5) * value * 0.2 for value in mean ]
205
206  initial_means.append(mean)
207
208  initial_clusters = self.__calculate_initial_clusters(initial_means)
209  initial_covariance = self.__calculate_initial_covariances(initial_clusters)
210
211  return initial_means, initial_covariance
212
213
214  def __initialize_kmeans(self):
215  initial_centers = kmeans_plusplus_initializer(self.__sample, self.__amount).initialize()
216  kmeans_instance = kmeans(self.__sample, initial_centers, ccore = True)
217  kmeans_instance.process()
218
219  means = kmeans_instance.get_centers()
220
221  covariances = []
222  initial_clusters = kmeans_instance.get_clusters()
223  for initial_cluster in initial_clusters:
224  if len(initial_cluster) > 1:
225  cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ]
226  covariances.append(numpy.cov(cluster_sample, rowvar = False))
227  else:
228  dimension = len(self.__sample)
229  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
230
231  return means, covariances
232
233
234
236  """!
237  @brief Observer of EM algorithm for collecting algorithm state on each step.
238  @details It can be used to obtain whole picture about clustering process of EM algorithm. Allocated clusters,
239  means and covariances are stored in observer on each step. Here an example of usage:
240
241  @code
242  from pyclustering.cluster.ema import ema, ema_observer
244  from pyclustering.samples.definitions import SIMPLE_SAMPLES
245
246  # Read data from text file.
248
249  # Create EM observer.
250  observer = ema_observer()
251
252  # Create EM algorithm to allocated four clusters and pass observer to it.
253  ema_instance = ema(sample, 4, observer=observer)
254
255  # Run clustering process.
256  ema_instance.process()
257
258  # Print amount of steps that were done by the algorithm.
259  print("EMA steps:", observer.get_iterations())
260
261  # Print evolution of means and covariances.
262  print("Means evolution:", observer.get_evolution_means())
263  print("Covariances evolution:", observer.get_evolution_covariances())
264
265  # Print evolution of clusters.
266  print("Clusters evolution:", observer.get_evolution_clusters())
267
268  # Print final clusters.
269  print("Allocated clusters:", observer.get_evolution_clusters()[-1])
270  @endcode
271
272  """
273  def __init__(self):
274  """!
275  @brief Initializes EM observer.
276
277  """
278  self.__means_evolution = []
279  self.__covariances_evolution = []
280  self.__clusters_evolution = []
281
282
283  def __len__(self):
284  """!
285  @return (uint) Amount of iterations that were done by the EM algorithm.
286
287  """
288  return len(self.__means_evolution)
289
290
291  def get_iterations(self):
292  """!
293  @return (uint) Amount of iterations that were done by the EM algorithm.
294
295  """
296  return len(self.__means_evolution)
297
298
300  """!
301  @return (list) Mean of each cluster on each step of clustering.
302
303  """
304  return self.__means_evolution
305
306
308  """!
309  @return (list) Covariance matrix (or variance in case of one-dimensional data) of each cluster on each step of clustering.
310
311  """
312  return self.__covariances_evolution
313
314
316  """!
317  @return (list) Allocated clusters on each step of clustering.
318
319  """
320  return self.__clusters_evolution
321
322
323  def notify(self, means, covariances, clusters):
324  """!
325  @brief This method is used by the algorithm to notify observer about changes where the algorithm
326  should provide new values: means, covariances and allocated clusters.
327
328  @param[in] means (list): Mean of each cluster on currect step.
329  @param[in] covariances (list): Covariances of each cluster on current step.
330  @param[in] clusters (list): Allocated cluster on current step.
331
332  """
333  self.__means_evolution.append(means)
334  self.__covariances_evolution.append(covariances)
335  self.__clusters_evolution.append(clusters)
336
337
338
340  """!
341  @brief Visualizer of EM algorithm's results.
342  @details Provides services for visualization of particular features of the algorithm, for example,
343  in case of two-dimensional dataset it shows covariance ellipses.
344
345  """
346
347  @staticmethod
348  def show_clusters(clusters, sample, covariances, means, figure = None, display = True):
349  """!
350  @brief Draws clusters and in case of two-dimensional dataset draws their ellipses.
351
352  @param[in] clusters (list): Clusters that were allocated by the algorithm.
353  @param[in] sample (list): Dataset that were used for clustering.
354  @param[in] covariances (list): Covariances of the clusters.
355  @param[in] means (list): Means of the clusters.
356  @param[in] figure (figure): If 'None' then new is figure is creater, otherwise specified figure is used
357  for visualization.
358  @param[in] display (bool): If 'True' then figure will be shown by the method, otherwise it should be
359  shown manually using matplotlib function 'plt.show()'.
360
361  @return (figure) Figure where clusters were drawn.
362
363  """
364
365  visualizer = cluster_visualizer()
366  visualizer.append_clusters(clusters, sample)
367
368  if figure is None:
369  figure = visualizer.show(display = False)
370  else:
371  visualizer.show(figure = figure, display = False)
372
373  if len(sample) == 2:
374  ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means)
375
376  if display is True:
377  plt.show()
378
379  return figure
380
381
382  @staticmethod
383  def animate_cluster_allocation(data, observer, animation_velocity = 75, movie_fps = 1, save_movie = None):
384  """!
385  @brief Animates clustering process that is performed by EM algorithm.
386
387  @param[in] data (list): Dataset that is used for clustering.
388  @param[in] observer (ema_observer): EM observer that was used for collection information about clustering process.
389  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
390  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
391  @param[in] save_movie (string): If it is specified then animation will be stored to file that is specified in this parameter.
392
393  """
394
395  figure = plt.figure()
396
397  def init_frame():
398  return frame_generation(0)
399
400  def frame_generation(index_iteration):
401  figure.clf()
402
403  figure.suptitle("EM algorithm (iteration: " + str(index_iteration) +")", fontsize = 18, fontweight = 'bold')
404
405  clusters = observer.get_evolution_clusters()[index_iteration]
406  covariances = observer.get_evolution_covariances()[index_iteration]
407  means = observer.get_evolution_means()[index_iteration]
408
409  ema_visualizer.show_clusters(clusters, data, covariances, means, figure, False)
411
412  return [ figure.gca() ]
413
414  iterations = len(observer)
415  cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000)
416
417  if save_movie is not None:
418  cluster_animation.save(save_movie, writer = 'ffmpeg', fps = movie_fps, bitrate = 1500)
419  else:
420  plt.show()
421
422
423  @staticmethod
424  def __draw_ellipses(figure, visualizer, clusters, covariances, means):
425  ax = figure.get_axes()
426
427  for index in range(len(clusters)):
428  angle, width, height = calculate_ellipse_description(covariances[index])
429  color = visualizer.get_cluster_color(index, 0)
430
431  ema_visualizer.__draw_ellipse(ax, means[index], means[index], angle, width, height, color)
432
433
434  @staticmethod
435  def __draw_ellipse(ax, x, y, angle, width, height, color):
436  if (width > 0.0) and (height > 0.0):
437  ax.plot(x, y, color=color, marker='x', markersize=6)
438  ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=-angle, linewidth=2, fill=True, zorder=2, color=color)
440
441
442
443 class ema:
444  """!
445  @brief Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
446  @details The algorithm provides only clustering services (unsupervised learning).
447  Here an example of data clustering process:
448  @code
449  from pyclustering.cluster.ema import ema, ema_visualizer
451  from pyclustering.samples.definitions import FCPS_SAMPLES
452
453  # Read data from text file.
455
456  # Create EM algorithm to allocated four clusters.
457  ema_instance = ema(sample, 3)
458
459  # Run clustering process.
460  ema_instance.process()
461
462  # Get clustering results.
463  clusters = ema_instance.get_clusters()
464  covariances = ema_instance.get_covariances()
465  means = ema_instance.get_centers()
466
467  # Visualize obtained clustering results.
468  ema_visualizer.show_clusters(clusters, sample, covariances, means)
469  @endcode
470
471  Here is clustering results of the Expectation-Maximization clustering algorithm where popular sample 'OldFaithful' was used.
472  Initial random means and covariances were used in the example. The first step is presented on the left side of the figure and
473  final result (the last step) is on the right side:
474  @image html ema_old_faithful_clustering.png
475
476  @see ema_visualizer
477  @see ema_observer
478
479  """
480  def __init__(self, data, amount_clusters, means = None, variances = None, observer = None, tolerance = 0.00001, iterations = 100):
481  """!
482  @brief Initializes Expectation-Maximization algorithm for cluster analysis.
483
484  @param[in] data (list): Dataset that should be analysed and where each point (object) is represented by the list of coordinates.
485  @param[in] amount_clusters (uint): Amount of clusters that should be allocated.
486  @param[in] means (list): Initial means of clusters (amount of means should be equal to amount of clusters for allocation).
487  If this parameter is 'None' then K-Means algorithm with K-Means++ method will be used for initialization by default.
488  @param[in] variances (list): Initial cluster variances (or covariances in case of multi-dimensional data). Amount of
489  covariances should be equal to amount of clusters that should be allocated. If this parameter is 'None' then
490  K-Means algorithm with K-Means++ method will be used for initialization by default.
491  @param[in] observer (ema_observer): Observer for gathering information about clustering process.
492  @param[in] tolerance (float): Defines stop condition of the algorithm (when difference between current and
493  previous log-likelihood estimation is less then 'tolerance' then clustering is over).
494  @param[in] iterations (uint): Additional stop condition parameter that defines maximum number of steps that can be
495  performed by the algorithm during clustering process.
496
497  """
498
499  self.__data = numpy.array(data)
500  self.__amount_clusters = amount_clusters
501  self.__tolerance = tolerance
502  self.__iterations = iterations
503  self.__observer = observer
504
505  self.__means = means
506  self.__variances = variances
507
508  if (means is None) or (variances is None):
509  self.__means, self.__variances = ema_initializer(data, amount_clusters).initialize(ema_init_type.KMEANS_INITIALIZATION)
510
511  if len(self.__means) != amount_clusters:
512  self.__amount_clusters = len(self.__means)
513
514  self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ]
515  self.__pic = [1.0] * amount_clusters
516  self.__clusters = []
517  self.__gaussians = [ [] for _ in range(amount_clusters) ]
518  self.__stop = False
519
520
521  def process(self):
522  """!
523  @brief Run clustering process of the algorithm.
524  @details This method should be called before call 'get_clusters()'.
525
526  """
527
528  previous_likelihood = -200000
529  current_likelihood = -100000
530
531  current_iteration = 0
532  while(self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) and (current_iteration < self.__iterations):
533  self.__expectation_step()
534  self.__maximization_step()
535
536  current_iteration += 1
537
538  self.__extract_clusters()
539  self.__notify()
540
541  previous_likelihood = current_likelihood
542  current_likelihood = self.__log_likelihood()
543  self.__stop = self.__get_stop_condition()
544
546
547
548  def get_clusters(self):
549  """!
550  @return (list) Allocated clusters where each cluster is represented by list of indexes of points from dataset,
551  for example, two cluster may have following representation [[0, 1, 4], [2, 3, 5, 6]].
552
553  """
554  return self.__clusters
555
556
557  def get_centers(self):
558  """!
559  @return (list) Corresponding centers (means) of clusters.
560
561  """
562
563  return self.__means
564
565
566  def get_covariances(self):
567  """!
568  @return (list) Corresponding variances (or covariances in case of multi-dimensional data) of clusters.
569
570  """
571
572  return self.__variances
573
574
575  def get_probabilities(self):
576  """!
577  @brief Returns 2-dimensional list with belong probability of each object from data to cluster correspondingly,
578  where that first index is for cluster and the second is for point.
579
580  @code
581  # Get belong probablities
582  probabilities = ema_instance.get_probabilities();
583
584  # Show porbability of the fifth element in the first and in the second cluster
585  index_point = 5;
586  print("Probability in the first cluster:", probabilities[index_point]);
587  print("Probability in the first cluster:", probabilities[index_point]);
588  @endcode
589
590  @return (list) 2-dimensional list with belong probability of each object from data to cluster.
591
592  """
593
594  return self.__rc
595
596
597  def __erase_empty_clusters(self):
598  clusters, means, variances, pic, gaussians, rc = [], [], [], [], [], []
599
600  for index_cluster in range(len(self.__clusters)):
601  if len(self.__clusters[index_cluster]) > 0:
602  clusters.append(self.__clusters[index_cluster])
603  means.append(self.__means[index_cluster])
604  variances.append(self.__variances[index_cluster])
605  pic.append(self.__pic[index_cluster])
606  gaussians.append(self.__gaussians[index_cluster])
607  rc.append(self.__rc[index_cluster])
608
609  if len(self.__clusters) != len(clusters):
610  self.__clusters, self.__means, self.__variances, self.__pic = clusters, means, variances, pic
611  self.__gaussians, self.__rc = gaussians, rc
612  self.__amount_clusters = len(self.__clusters)
613
614
615  def __notify(self):
616  if self.__observer is not None:
617  self.__observer.notify(self.__means, self.__variances, self.__clusters)
618
619
620  def __extract_clusters(self):
621  self.__clusters = [ [] for _ in range(self.__amount_clusters) ]
622  for index_point in range(len(self.__data)):
623  candidates = []
624  for index_cluster in range(self.__amount_clusters):
625  candidates.append((index_cluster, self.__rc[index_cluster][index_point]))
626
627  index_winner = max(candidates, key = lambda candidate : candidate)
628  self.__clusters[index_winner].append(index_point)
629
631
632
633  def __log_likelihood(self):
634  likelihood = 0.0
635
636  for index_point in range(len(self.__data)):
637  particle = 0.0
638  for index_cluster in range(self.__amount_clusters):
639  particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point]
640
641  if particle > 0.0:
642  likelihood += numpy.log(particle)
643
644  return likelihood
645
646
647  def __probabilities(self, index_cluster, index_point):
648  divider = 0.0
649  for i in range(self.__amount_clusters):
650  divider += self.__pic[i] * self.__gaussians[i][index_point]
651
652  if (divider != 0.0) and (divider != float('inf')):
653  return self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider
654
655  return 1.0
656
657
658  def __expectation_step(self):
659  self.__gaussians = [ [] for _ in range(self.__amount_clusters) ]
660  for index in range(self.__amount_clusters):
661  self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index])
662
663  self.__rc = [ [0.0] * len(self.__data) for _ in range(self.__amount_clusters) ]
664  for index_cluster in range(self.__amount_clusters):
665  for index_point in range(len(self.__data)):
666  self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point)
667
668
669  def __maximization_step(self):
670  self.__pic = []
671  self.__means = []
672  self.__variances = []
673
674  amount_impossible_clusters = 0
675
676  for index_cluster in range(self.__amount_clusters):
677  mc = numpy.sum(self.__rc[index_cluster])
678
679  if mc == 0.0:
680  amount_impossible_clusters += 1
681  continue
682
683  self.__pic.append( mc / len(self.__data) )
684  self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) )
685  self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) )
686
687  self.__amount_clusters -= amount_impossible_clusters
688
689
690  def __get_stop_condition(self):
691  for covariance in self.__variances:
692  if numpy.linalg.norm(covariance) == 0.0:
693  return True
694
695  return False
696
697
698  def __update_covariance(self, means, rc, mc):
699  covariance = 0.0
700  for index_point in range(len(self.__data)):
701  deviation = numpy.array( [ self.__data[index_point] - means ])
702  covariance += rc[index_point] * deviation.T.dot(deviation)
703
704  covariance = covariance / mc
705  return covariance
706
707
708  def __update_mean(self, rc, mc):
709  mean = 0.0
710  for index_point in range(len(self.__data)):
711  mean += rc[index_point] * self.__data[index_point]
712
713  mean = mean / mc
714  return mean
715
716
717  def __normalize_probabilities(self):
718  for index_point in range(len(self.__data)):
719  probability = 0.0
720  for index_cluster in range(len(self.__clusters)):
721  probability += self.__rc[index_cluster][index_point]
722
723  if abs(probability - 1.0) > 0.000001:
724  self.__normalize_probability(index_point, probability)
725
726
727  def __normalize_probability(self, index_point, probability):
728  if probability == 0.0:
729  return
730
731  normalization = 1.0 / probability
732
733  for index_cluster in range(len(self.__clusters)):
734  self.__rc[index_cluster][index_point] *= normalization
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:359
pyclustering module for cluster analysis.
Definition: __init__.py:1
def __init__(self, sample, amount)
Constructs EM initializer.
Definition: ema.py:125
Cluster analysis algorithm: K-Means.
Definition: kmeans.py:1
def __update_covariance(self, means, rc, mc)
Definition: ema.py:698
def __probabilities(self, index_cluster, index_point)
Definition: ema.py:647
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
def animate_cluster_allocation(data, observer, animation_velocity=75, movie_fps=1, save_movie=None)
Animates clustering process that is performed by EM algorithm.
Definition: ema.py:383
def __extract_clusters(self)
Definition: ema.py:620
K-Means++ is an algorithm for choosing the initial centers for algorithms like K-Means or X-Means...
Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
Definition: ema.py:443
def get_covariances(self)
Definition: ema.py:566
def __get_stop_condition(self)
Definition: ema.py:690
def gaussian(data, mean, covariance)
Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covar...
Definition: ema.py:48
def notify(self, means, covariances, clusters)
This method is used by the algorithm to notify observer about changes where the algorithm should prov...
Definition: ema.py:323
def __expectation_step(self)
Definition: ema.py:658
Observer of EM algorithm for collecting algorithm state on each step.
Definition: ema.py:235
Class represents K-Means clustering algorithm.
Definition: kmeans.py:272
def __calculate_initial_clusters(self, centers)
Calculate Euclidean distance to each point from the each cluster.
Definition: ema.py:156
def initialize(self, init_type=ema_init_type.KMEANS_INITIALIZATION)
Calculates initial parameters for EM algorithm: means and covariances using specified strategy...
Definition: ema.py:137
def __init__(self)
Initializes EM observer.
Definition: ema.py:273
def show_clusters(clusters, sample, covariances, means, figure=None, display=True)
Draws clusters and in case of two-dimensional dataset draws their ellipses.
Definition: ema.py:348
def __normalize_probabilities(self)
Definition: ema.py:717
Provides servies for preparing initial means and covariances for Expectation-Maximization algorithm...
Definition: ema.py:102
def __update_mean(self, rc, mc)
Definition: ema.py:708
def __maximization_step(self)
Definition: ema.py:669
Collection of center initializers for algorithm that uses initial centers, for example, for K-Means or X-Means.
def process(self)
Run clustering process of the algorithm.
Definition: ema.py:521
def get_clusters(self)
Definition: ema.py:548
def get_probabilities(self)
Returns 2-dimensional list with belong probability of each object from data to cluster correspondingl...
Definition: ema.py:575
Visualizer of EM algorithm&#39;s results.
Definition: ema.py:339
def __calculate_initial_covariances(self, initial_clusters)
Definition: ema.py:180
Enumeration of initialization types for Expectation-Maximization algorithm.
Definition: ema.py:84
def __log_likelihood(self)
Definition: ema.py:633
def __normalize_probability(self, index_point, probability)
Definition: ema.py:727
def __init__(self, data, amount_clusters, means=None, variances=None, observer=None, tolerance=0.00001, iterations=100)
Initializes Expectation-Maximization algorithm for cluster analysis.
Definition: ema.py:480
def __erase_empty_clusters(self)
Definition: ema.py:597