 pyclustering  0.10.1 pyclustring is a Python, C++ data mining library.
ema.py
1 """!
2
3 @brief Cluster analysis algorithm: Expectation-Maximization Algorithm for Gaussian Mixture Model.
4 @details Implementation based on paper @cite article::ema::1.
5
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
9
10 """
11
12
13 import numpy
14 import random
15
16 from pyclustering.cluster import cluster_visualizer
17 from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
18 from pyclustering.cluster.kmeans import kmeans
19
20 from pyclustering.utils import pi, calculate_ellipse_description, euclidean_distance_square
21
22 from enum import IntEnum
23
24 import matplotlib.pyplot as plt
25 import matplotlib.animation as animation
26 from matplotlib import patches
27
28
29 def gaussian(data, mean, covariance):
30  """!
31  @brief Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covariance in case
32  multi-dimensional data.
33
34  @param[in] data (list): Data that is used for gaussian calculation.
35  @param[in] mean (float|numpy.array): Mathematical expectation used for calculation.
36  @param[in] covariance (float|numpy.array): Variance or covariance matrix for calculation.
37
38  @return (list) Value of gaussian function for each point in dataset.
39
40  """
41  dimension = float(len(data))
42
43  if dimension != 1.0:
44  inv_variance = numpy.linalg.pinv(covariance)
45  else:
46  inv_variance = 1.0 / covariance
47
48  divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance))
49  if divider != 0.0:
50  right_const = 1.0 / divider
51  else:
52  right_const = float('inf')
53
54  result = []
55
56  for point in data:
57  mean_delta = point - mean
58  point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) )
59  result.append(point_gaussian)
60
61  return result
62
63
64
65 class ema_init_type(IntEnum):
66  """!
67  @brief Enumeration of initialization types for Expectation-Maximization algorithm.
68
69  """
70
71
73  RANDOM_INITIALIZATION = 0
74
75
79  KMEANS_INITIALIZATION = 1
80
81
82
84  """!
85  @brief Provides services for preparing initial means and covariances for Expectation-Maximization algorithm.
86  @details Initialization strategy is defined by enumerator 'ema_init_type': random initialization and
87  kmeans with kmeans++ initialization. Here an example of initialization using kmeans strategy:
88
89  @code
91  from pyclustering.samples.definitions import FAMOUS_SAMPLES
92  from pyclustering.cluster.ema import ema_initializer
93
95  amount_clusters = 2
96
97  initial_means, initial_covariance = ema_initializer(sample, amount_clusters).initialize()
98  print(initial_means)
99  print(initial_covariance)
100  @endcode
101
102  """
103
104  __MAX_GENERATION_ATTEMPTS = 10
105
106  def __init__(self, sample, amount):
107  """!
108  @brief Constructs EM initializer.
109
110  @param[in] sample (list): Data that will be used by the EM algorithm.
111  @param[in] amount (uint): Amount of clusters that should be allocated by the EM algorithm.
112
113  """
114  self.__sample = sample
115  self.__amount = amount
116
117
118  def initialize(self, init_type = ema_init_type.KMEANS_INITIALIZATION):
119  """!
120  @brief Calculates initial parameters for EM algorithm: means and covariances using
121  specified strategy.
122
123  @param[in] init_type (ema_init_type): Strategy for initialization.
124
125  @return (float|list, float|numpy.array) Initial means and variance (covariance matrix in case multi-dimensional data).
126
127  """
128  if init_type == ema_init_type.KMEANS_INITIALIZATION:
129  return self.__initialize_kmeans()
130
131  elif init_type == ema_init_type.RANDOM_INITIALIZATION:
132  return self.__initialize_random()
133
134  raise NameError("Unknown type of EM algorithm initialization is specified.")
135
136
137  def __calculate_initial_clusters(self, centers):
138  """!
139  @brief Calculate Euclidean distance to each point from the each cluster.
140  @brief Nearest points are captured by according clusters and as a result clusters are updated.
141
142  @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
143
144  """
145
146  clusters = [[] for _ in range(len(centers))]
147  for index_point in range(len(self.__sample)):
148  index_optim, dist_optim = -1, 0.0
149
150  for index in range(len(centers)):
151  dist = euclidean_distance_square(self.__sample[index_point], centers[index])
152
153  if (dist < dist_optim) or (index == 0):
154  index_optim, dist_optim = index, dist
155
156  clusters[index_optim].append(index_point)
157
158  return clusters
159
160
161  def __calculate_initial_covariances(self, initial_clusters):
162  covariances = []
163  for initial_cluster in initial_clusters:
164  if len(initial_cluster) > 1:
165  cluster_sample = [self.__sample[index_point] for index_point in initial_cluster]
166  covariances.append(numpy.cov(cluster_sample, rowvar=False))
167  else:
168  dimension = len(self.__sample)
169  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
170
171  return covariances
172
173
174  def __initialize_random(self):
175  initial_means = []
176
177  for _ in range(self.__amount):
178  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
179  attempts = 0
180  while (mean in initial_means) and (attempts < ema_initializer.__MAX_GENERATION_ATTEMPTS):
181  mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ]
182  attempts += 1
183
184  if attempts == ema_initializer.__MAX_GENERATION_ATTEMPTS:
185  mean = [ value + (random.random() - 0.5) * value * 0.2 for value in mean ]
186
187  initial_means.append(mean)
188
189  initial_clusters = self.__calculate_initial_clusters(initial_means)
190  initial_covariance = self.__calculate_initial_covariances(initial_clusters)
191
192  return initial_means, initial_covariance
193
194
195  def __initialize_kmeans(self):
196  initial_centers = kmeans_plusplus_initializer(self.__sample, self.__amount).initialize()
197  kmeans_instance = kmeans(self.__sample, initial_centers, ccore = True)
198  kmeans_instance.process()
199
200  means = kmeans_instance.get_centers()
201
202  covariances = []
203  initial_clusters = kmeans_instance.get_clusters()
204  for initial_cluster in initial_clusters:
205  if len(initial_cluster) > 1:
206  cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ]
207  covariances.append(numpy.cov(cluster_sample, rowvar=False))
208  else:
209  dimension = len(self.__sample)
210  covariances.append(numpy.zeros((dimension, dimension)) + random.random() / 10.0)
211
212  return means, covariances
213
214
215
217  """!
218  @brief Observer of EM algorithm for collecting algorithm state on each step.
219  @details It can be used to obtain whole picture about clustering process of EM algorithm. Allocated clusters,
220  means and covariances are stored in observer on each step. Here an example of usage:
221
222  @code
223  from pyclustering.cluster.ema import ema, ema_observer
225  from pyclustering.samples.definitions import SIMPLE_SAMPLES
226
227  # Read data from text file.
229
230  # Create EM observer.
231  observer = ema_observer()
232
233  # Create EM algorithm to allocated four clusters and pass observer to it.
234  ema_instance = ema(sample, 4, observer=observer)
235
236  # Run clustering process.
237  ema_instance.process()
238
239  # Print amount of steps that were done by the algorithm.
240  print("EMA steps:", observer.get_iterations())
241
242  # Print evolution of means and covariances.
243  print("Means evolution:", observer.get_evolution_means())
244  print("Covariances evolution:", observer.get_evolution_covariances())
245
246  # Print evolution of clusters.
247  print("Clusters evolution:", observer.get_evolution_clusters())
248
249  # Print final clusters.
250  print("Allocated clusters:", observer.get_evolution_clusters()[-1])
251  @endcode
252
253  """
254  def __init__(self):
255  """!
256  @brief Initializes EM observer.
257
258  """
259  self.__means_evolution = []
260  self.__covariances_evolution = []
261  self.__clusters_evolution = []
262
263
264  def __len__(self):
265  """!
266  @return (uint) Amount of iterations that were done by the EM algorithm.
267
268  """
269  return len(self.__means_evolution)
270
271
272  def get_iterations(self):
273  """!
274  @return (uint) Amount of iterations that were done by the EM algorithm.
275
276  """
277  return len(self.__means_evolution)
278
279
281  """!
282  @return (list) Mean of each cluster on each step of clustering.
283
284  """
285  return self.__means_evolution
286
287
289  """!
290  @return (list) Covariance matrix (or variance in case of one-dimensional data) of each cluster on each step of clustering.
291
292  """
293  return self.__covariances_evolution
294
295
297  """!
298  @return (list) Allocated clusters on each step of clustering.
299
300  """
301  return self.__clusters_evolution
302
303
304  def notify(self, means, covariances, clusters):
305  """!
306  @brief This method is used by the algorithm to notify observer about changes where the algorithm
307  should provide new values: means, covariances and allocated clusters.
308
309  @param[in] means (list): Mean of each cluster on currect step.
310  @param[in] covariances (list): Covariances of each cluster on current step.
311  @param[in] clusters (list): Allocated cluster on current step.
312
313  """
314  self.__means_evolution.append(means)
315  self.__covariances_evolution.append(covariances)
316  self.__clusters_evolution.append(clusters)
317
318
319
321  """!
322  @brief Visualizer of EM algorithm's results.
323  @details Provides services for visualization of particular features of the algorithm, for example,
324  in case of two-dimensional dataset it shows covariance ellipses.
325
326  """
327
328  @staticmethod
329  def show_clusters(clusters, sample, covariances, means, figure = None, display = True):
330  """!
331  @brief Draws clusters and in case of two-dimensional dataset draws their ellipses.
332
333  @param[in] clusters (list): Clusters that were allocated by the algorithm.
334  @param[in] sample (list): Dataset that were used for clustering.
335  @param[in] covariances (list): Covariances of the clusters.
336  @param[in] means (list): Means of the clusters.
337  @param[in] figure (figure): If 'None' then new is figure is creater, otherwise specified figure is used
338  for visualization.
339  @param[in] display (bool): If 'True' then figure will be shown by the method, otherwise it should be
340  shown manually using matplotlib function 'plt.show()'.
341
342  @return (figure) Figure where clusters were drawn.
343
344  """
345
346  visualizer = cluster_visualizer()
347  visualizer.append_clusters(clusters, sample)
348
349  if figure is None:
350  figure = visualizer.show(display = False)
351  else:
352  visualizer.show(figure = figure, display = False)
353
354  if len(sample) == 2:
355  ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means)
356
357  if display is True:
358  plt.show()
359
360  return figure
361
362
363  @staticmethod
364  def animate_cluster_allocation(data, observer, animation_velocity = 75, movie_fps = 1, save_movie = None):
365  """!
366  @brief Animates clustering process that is performed by EM algorithm.
367
368  @param[in] data (list): Dataset that is used for clustering.
369  @param[in] observer (ema_observer): EM observer that was used for collection information about clustering process.
370  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
371  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
372  @param[in] save_movie (string): If it is specified then animation will be stored to file that is specified in this parameter.
373
374  """
375
376  figure = plt.figure()
377
378  def init_frame():
379  return frame_generation(0)
380
381  def frame_generation(index_iteration):
382  figure.clf()
383
384  figure.suptitle("EM algorithm (iteration: " + str(index_iteration) +")", fontsize = 18, fontweight = 'bold')
385
386  clusters = observer.get_evolution_clusters()[index_iteration]
387  covariances = observer.get_evolution_covariances()[index_iteration]
388  means = observer.get_evolution_means()[index_iteration]
389
390  ema_visualizer.show_clusters(clusters, data, covariances, means, figure, False)
392
393  return [ figure.gca() ]
394
395  iterations = len(observer)
396  cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000)
397
398  if save_movie is not None:
399  cluster_animation.save(save_movie, writer = 'ffmpeg', fps = movie_fps, bitrate = 1500)
400  else:
401  plt.show()
402
403
404  @staticmethod
405  def __draw_ellipses(figure, visualizer, clusters, covariances, means):
406  ax = figure.get_axes()
407
408  for index in range(len(clusters)):
409  angle, width, height = calculate_ellipse_description(covariances[index])
410  color = visualizer.get_cluster_color(index, 0)
411
412  ema_visualizer.__draw_ellipse(ax, means[index], means[index], angle, width, height, color)
413
414
415  @staticmethod
416  def __draw_ellipse(ax, x, y, angle, width, height, color):
417  if (width > 0.0) and (height > 0.0):
418  ax.plot(x, y, color=color, marker='x', markersize=6)
419  ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=-angle, linewidth=2, fill=True, zorder=2, color=color)
421
422
423
424 class ema:
425  """!
426  @brief Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
427  @details The algorithm provides only clustering services (unsupervised learning).
428  Here an example of data clustering process:
429  @code
430  from pyclustering.cluster.ema import ema, ema_visualizer
432  from pyclustering.samples.definitions import FCPS_SAMPLES
433
434  # Read data from text file.
436
437  # Create EM algorithm to allocated four clusters.
438  ema_instance = ema(sample, 3)
439
440  # Run clustering process.
441  ema_instance.process()
442
443  # Get clustering results.
444  clusters = ema_instance.get_clusters()
445  covariances = ema_instance.get_covariances()
446  means = ema_instance.get_centers()
447
448  # Visualize obtained clustering results.
449  ema_visualizer.show_clusters(clusters, sample, covariances, means)
450  @endcode
451
452  Here is clustering results of the Expectation-Maximization clustering algorithm where popular sample 'OldFaithful' was used.
453  Initial random means and covariances were used in the example. The first step is presented on the left side of the figure and
454  final result (the last step) is on the right side:
455  @image html ema_old_faithful_clustering.png
456
457  @see ema_visualizer
458  @see ema_observer
459
460  """
461  def __init__(self, data, amount_clusters, means=None, variances=None, observer=None, tolerance=0.00001, iterations=100):
462  """!
463  @brief Initializes Expectation-Maximization algorithm for cluster analysis.
464
465  @param[in] data (list): Dataset that should be analysed and where each point (object) is represented by the list of coordinates.
466  @param[in] amount_clusters (uint): Amount of clusters that should be allocated.
467  @param[in] means (list): Initial means of clusters (amount of means should be equal to amount of clusters for allocation).
468  If this parameter is 'None' then K-Means algorithm with K-Means++ method will be used for initialization by default.
469  @param[in] variances (list): Initial cluster variances (or covariances in case of multi-dimensional data). Amount of
470  covariances should be equal to amount of clusters that should be allocated. If this parameter is 'None' then
471  K-Means algorithm with K-Means++ method will be used for initialization by default.
472  @param[in] observer (ema_observer): Observer for gathering information about clustering process.
473  @param[in] tolerance (float): Defines stop condition of the algorithm (when difference between current and
474  previous log-likelihood estimation is less then 'tolerance' then clustering is over).
475  @param[in] iterations (uint): Additional stop condition parameter that defines maximum number of steps that can be
476  performed by the algorithm during clustering process.
477
478  """
479
480  self.__data = numpy.array(data)
481  self.__amount_clusters = amount_clusters
482  self.__tolerance = tolerance
483  self.__iterations = iterations
484  self.__observer = observer
485
486  self.__means = means
487  self.__variances = variances
488
489  self.__verify_arguments()
490
491  if (means is None) or (variances is None):
492  self.__means, self.__variances = ema_initializer(data, amount_clusters).initialize(ema_init_type.KMEANS_INITIALIZATION)
493
494  if len(self.__means) != amount_clusters:
495  self.__amount_clusters = len(self.__means)
496
497  self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ]
498  self.__pic = [1.0] * amount_clusters
499  self.__clusters = []
500  self.__gaussians = [ [] for _ in range(amount_clusters) ]
501  self.__stop = False
502
503
504  def process(self):
505  """!
506  @brief Run clustering process of the algorithm.
507
508  @return (ema) Returns itself (EMA instance).
509
510  """
511
512  previous_likelihood = -200000
513  current_likelihood = -100000
514
515  current_iteration = 0
516  while(self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) and (current_iteration < self.__iterations):
517  self.__expectation_step()
518  self.__maximization_step()
519
520  current_iteration += 1
521
522  self.__extract_clusters()
523  self.__notify()
524
525  previous_likelihood = current_likelihood
526  current_likelihood = self.__log_likelihood()
527  self.__stop = self.__get_stop_condition()
528
530  return self
531
532
533  def get_clusters(self):
534  """!
535  @return (list) Allocated clusters where each cluster is represented by list of indexes of points from dataset,
536  for example, two cluster may have following representation [[0, 1, 4], [2, 3, 5, 6]].
537
538  """
539  return self.__clusters
540
541
542  def get_centers(self):
543  """!
544  @return (list) Corresponding centers (means) of clusters.
545
546  """
547
548  return self.__means
549
550
551  def get_covariances(self):
552  """!
553  @return (list) Corresponding variances (or covariances in case of multi-dimensional data) of clusters.
554
555  """
556
557  return self.__variances
558
559
560  def get_probabilities(self):
561  """!
562  @brief Returns 2-dimensional list with belong probability of each object from data to cluster correspondingly,
563  where that first index is for cluster and the second is for point.
564
565  @code
566  # Get belong probablities
567  probabilities = ema_instance.get_probabilities();
568
569  # Show porbability of the fifth element in the first and in the second cluster
570  index_point = 5;
571  print("Probability in the first cluster:", probabilities[index_point]);
572  print("Probability in the first cluster:", probabilities[index_point]);
573  @endcode
574
575  @return (list) 2-dimensional list with belong probability of each object from data to cluster.
576
577  """
578
579  return self.__rc
580
581
582  def __erase_empty_clusters(self):
583  clusters, means, variances, pic, gaussians, rc = [], [], [], [], [], []
584
585  for index_cluster in range(len(self.__clusters)):
586  if len(self.__clusters[index_cluster]) > 0:
587  clusters.append(self.__clusters[index_cluster])
588  means.append(self.__means[index_cluster])
589  variances.append(self.__variances[index_cluster])
590  pic.append(self.__pic[index_cluster])
591  gaussians.append(self.__gaussians[index_cluster])
592  rc.append(self.__rc[index_cluster])
593
594  if len(self.__clusters) != len(clusters):
595  self.__clusters, self.__means, self.__variances, self.__pic = clusters, means, variances, pic
596  self.__gaussians, self.__rc = gaussians, rc
597  self.__amount_clusters = len(self.__clusters)
598
599
600  def __notify(self):
601  if self.__observer is not None:
602  self.__observer.notify(self.__means, self.__variances, self.__clusters)
603
604
605  def __extract_clusters(self):
606  self.__clusters = [[] for _ in range(self.__amount_clusters)]
607  for index_point in range(len(self.__data)):
608  candidates = []
609  for index_cluster in range(self.__amount_clusters):
610  candidates.append((index_cluster, self.__rc[index_cluster][index_point]))
611
612  index_winner = max(candidates, key=lambda candidate: candidate)
613  self.__clusters[index_winner].append(index_point)
614
616
617
618  def __log_likelihood(self):
619  likelihood = 0.0
620
621  for index_point in range(len(self.__data)):
622  particle = 0.0
623  for index_cluster in range(self.__amount_clusters):
624  particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point]
625
626  if particle > 0.0:
627  likelihood += numpy.log(particle)
628
629  return likelihood
630
631
632  def __probabilities(self, index_cluster, index_point):
633  divider = 0.0
634  for i in range(self.__amount_clusters):
635  divider += self.__pic[i] * self.__gaussians[i][index_point]
636
637  if (divider != 0.0) and (divider != float('inf')):
638  return self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider
639
640  return 1.0
641
642
643  def __expectation_step(self):
644  self.__gaussians = [ [] for _ in range(self.__amount_clusters) ]
645  for index in range(self.__amount_clusters):
646  self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index])
647
648  self.__rc = [ [0.0] * len(self.__data) for _ in range(self.__amount_clusters) ]
649  for index_cluster in range(self.__amount_clusters):
650  for index_point in range(len(self.__data)):
651  self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point)
652
653
654  def __maximization_step(self):
655  self.__pic = []
656  self.__means = []
657  self.__variances = []
658
659  amount_impossible_clusters = 0
660
661  for index_cluster in range(self.__amount_clusters):
662  mc = numpy.sum(self.__rc[index_cluster])
663
664  if mc == 0.0:
665  amount_impossible_clusters += 1
666  continue
667
668  self.__pic.append( mc / len(self.__data) )
669  self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) )
670  self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) )
671
672  self.__amount_clusters -= amount_impossible_clusters
673
674
675  def __get_stop_condition(self):
676  for covariance in self.__variances:
677  if numpy.linalg.norm(covariance) == 0.0:
678  return True
679
680  return False
681
682
683  def __update_covariance(self, means, rc, mc):
684  covariance = 0.0
685  for index_point in range(len(self.__data)):
686  deviation = numpy.array([self.__data[index_point] - means])
687  covariance += rc[index_point] * deviation.T.dot(deviation)
688
689  covariance = covariance / mc
690  return covariance
691
692
693  def __update_mean(self, rc, mc):
694  mean = 0.0
695  for index_point in range(len(self.__data)):
696  mean += rc[index_point] * self.__data[index_point]
697
698  mean = mean / mc
699  return mean
700
701
702  def __normalize_probabilities(self):
703  for index_point in range(len(self.__data)):
704  probability = 0.0
705  for index_cluster in range(len(self.__clusters)):
706  probability += self.__rc[index_cluster][index_point]
707
708  if abs(probability - 1.0) > 0.000001:
709  self.__normalize_probability(index_point, probability)
710
711
712  def __normalize_probability(self, index_point, probability):
713  if probability == 0.0:
714  return
715
716  normalization = 1.0 / probability
717
718  for index_cluster in range(len(self.__clusters)):
719  self.__rc[index_cluster][index_point] *= normalization
720
721
722  def __verify_arguments(self):
723  """!
724  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
725
726  """
727  if len(self.__data) == 0:
728  raise ValueError("Input data is empty (size: '%d')." % len(self.__data))
729
730  if self.__amount_clusters < 1:
731  raise ValueError("Amount of clusters (current value '%d') should be greater or equal to 1." %
732  self.__amount_clusters)
pyclustering.cluster.ema.ema.__tolerance
__tolerance
Definition: ema.py:482
pyclustering.cluster.center_initializer.kmeans_plusplus_initializer
K-Means++ is an algorithm for choosing the initial centers for algorithms like K-Means or X-Means.
Definition: center_initializer.py:95
pyclustering.cluster.ema.gaussian
def gaussian(data, mean, covariance)
Calculates gaussian for dataset using specified mean (mathematical expectation) and variance or covar...
Definition: ema.py:29
pyclustering.cluster.ema.ema.get_covariances
def get_covariances(self)
Definition: ema.py:551
pyclustering.cluster.ema.ema.__means
__means
Definition: ema.py:486
pyclustering.cluster.ema.ema.__stop
__stop
Definition: ema.py:501
pyclustering.cluster.ema.ema.__log_likelihood
def __log_likelihood(self)
Definition: ema.py:618
pyclustering.cluster.ema.ema.__probabilities
def __probabilities(self, index_cluster, index_point)
Definition: ema.py:632
pyclustering.cluster.ema.ema.__pic
__pic
Definition: ema.py:498
pyclustering.cluster.kmeans.kmeans
Class implements K-Means clustering algorithm.
Definition: kmeans.py:253
pyclustering.cluster.ema.ema.__gaussians
__gaussians
Definition: ema.py:500
pyclustering.cluster.ema.ema.get_centers
def get_centers(self)
Definition: ema.py:542
pyclustering.cluster.cluster_visualizer
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:370
pyclustering.cluster.ema.ema_initializer.__initialize_random
def __initialize_random(self)
Definition: ema.py:174
pyclustering.cluster.ema.ema_observer
Observer of EM algorithm for collecting algorithm state on each step.
Definition: ema.py:216
pyclustering.cluster.ema.ema_initializer.__calculate_initial_clusters
def __calculate_initial_clusters(self, centers)
Calculate Euclidean distance to each point from the each cluster.
Definition: ema.py:137
pyclustering.cluster.ema.ema_observer.__covariances_evolution
__covariances_evolution
Definition: ema.py:260
pyclustering.cluster.ema.ema.__iterations
__iterations
Definition: ema.py:483
pyclustering.cluster.center_initializer
Collection of center initializers for algorithm that uses initial centers, for example,...
Definition: center_initializer.py:1
pyclustering.cluster.ema.ema.__maximization_step
def __maximization_step(self)
Definition: ema.py:654
pyclustering.cluster.ema.ema_initializer.__init__
def __init__(self, sample, amount)
Constructs EM initializer.
Definition: ema.py:106
pyclustering.cluster.ema.ema.get_clusters
def get_clusters(self)
Definition: ema.py:533
pyclustering.cluster.ema.ema.__update_mean
def __update_mean(self, rc, mc)
Definition: ema.py:693
pyclustering.cluster.ema.ema_visualizer.animate_cluster_allocation
def animate_cluster_allocation(data, observer, animation_velocity=75, movie_fps=1, save_movie=None)
Animates clustering process that is performed by EM algorithm.
Definition: ema.py:364
pyclustering.cluster.ema.ema_initializer.initialize
def initialize(self, init_type=ema_init_type.KMEANS_INITIALIZATION)
Calculates initial parameters for EM algorithm: means and covariances using specified strategy.
Definition: ema.py:118
pyclustering.cluster.ema.ema.__update_covariance
def __update_covariance(self, means, rc, mc)
Definition: ema.py:683
pyclustering.cluster.ema.ema.__observer
__observer
Definition: ema.py:484
pyclustering.cluster.ema.ema_initializer.__sample
__sample
Definition: ema.py:114
pyclustering.cluster.ema.ema_observer.get_evolution_covariances
def get_evolution_covariances(self)
Definition: ema.py:288
pyclustering.cluster.ema.ema.__notify
def __notify(self)
Definition: ema.py:600
pyclustering.cluster.ema.ema_initializer
Provides services for preparing initial means and covariances for Expectation-Maximization algorithm.
Definition: ema.py:83
pyclustering.cluster.ema.ema_observer.get_evolution_means
def get_evolution_means(self)
Definition: ema.py:280
pyclustering.cluster.ema.ema_observer.notify
def notify(self, means, covariances, clusters)
This method is used by the algorithm to notify observer about changes where the algorithm should prov...
Definition: ema.py:304
pyclustering.cluster
pyclustering module for cluster analysis.
Definition: __init__.py:1
pyclustering.cluster.ema.ema.__verify_arguments
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: ema.py:722
pyclustering.cluster.ema.ema.__normalize_probability
def __normalize_probability(self, index_point, probability)
Definition: ema.py:712
pyclustering.cluster.ema.ema_initializer.__amount
__amount
Definition: ema.py:115
pyclustering.cluster.ema.ema.__expectation_step
def __expectation_step(self)
Definition: ema.py:643
pyclustering.cluster.ema.ema.process
def process(self)
Run clustering process of the algorithm.
Definition: ema.py:504
pyclustering.cluster.ema.ema_init_type
Enumeration of initialization types for Expectation-Maximization algorithm.
Definition: ema.py:65
pyclustering.cluster.ema.ema.__normalize_probabilities
def __normalize_probabilities(self)
Definition: ema.py:702
pyclustering.cluster.kmeans
The module contains K-Means algorithm and other related services.
Definition: kmeans.py:1
pyclustering.cluster.ema.ema_visualizer.show_clusters
def show_clusters(clusters, sample, covariances, means, figure=None, display=True)
Draws clusters and in case of two-dimensional dataset draws their ellipses.
Definition: ema.py:329
pyclustering.cluster.ema.ema_observer.get_evolution_clusters
def get_evolution_clusters(self)
Definition: ema.py:296
pyclustering.cluster.ema.ema.__get_stop_condition
def __get_stop_condition(self)
Definition: ema.py:675
pyclustering.cluster.ema.ema.__init__
def __init__(self, data, amount_clusters, means=None, variances=None, observer=None, tolerance=0.00001, iterations=100)
Initializes Expectation-Maximization algorithm for cluster analysis.
Definition: ema.py:461
pyclustering.cluster.ema.ema_observer.__init__
def __init__(self)
Initializes EM observer.
Definition: ema.py:254
pyclustering.cluster.ema.ema
Expectation-Maximization clustering algorithm for Gaussian Mixture Model (GMM).
Definition: ema.py:424
pyclustering.cluster.ema.ema.get_probabilities
def get_probabilities(self)
Returns 2-dimensional list with belong probability of each object from data to cluster correspondingl...
Definition: ema.py:560
pyclustering.cluster.ema.ema.__data
__data
Definition: ema.py:480
pyclustering.cluster.ema.ema_observer.__means_evolution
__means_evolution
Definition: ema.py:259
pyclustering.cluster.ema.ema_observer.__clusters_evolution
__clusters_evolution
Definition: ema.py:261
pyclustering.cluster.ema.ema.__extract_clusters
def __extract_clusters(self)
Definition: ema.py:605
pyclustering.cluster.ema.ema_visualizer
Visualizer of EM algorithm's results.
Definition: ema.py:320
pyclustering.utils
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
pyclustering.cluster.ema.ema.__amount_clusters
__amount_clusters
Definition: ema.py:481
pyclustering.cluster.ema.ema_observer.__len__
def __len__(self)
Definition: ema.py:264
pyclustering.cluster.ema.ema_initializer.__initialize_kmeans
def __initialize_kmeans(self)
Definition: ema.py:195
pyclustering.cluster.ema.ema_initializer.__calculate_initial_covariances
def __calculate_initial_covariances(self, initial_clusters)
Definition: ema.py:161
pyclustering.cluster.ema.ema.__variances
__variances
Definition: ema.py:487
pyclustering.cluster.ema.ema_observer.get_iterations
def get_iterations(self)
Definition: ema.py:272
pyclustering.cluster.ema.ema.__erase_empty_clusters
def __erase_empty_clusters(self)
Definition: ema.py:582
pyclustering.cluster.ema.ema.__clusters
__clusters
Definition: ema.py:499
pyclustering.cluster.ema.ema.__rc
__rc
Definition: ema.py:497