kmeans.py
1 """!
2 
3 @brief The module contains K-Means algorithm and other related services.
4 @details Implementation based on paper @cite inproceedings::kmeans::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import numpy
29 import warnings
30 
31 try:
32  import matplotlib.pyplot as plt
33  import matplotlib.animation as animation
34 except Exception as error_instance:
35  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
36  "functionality is not available (details: '%s')." % str(error_instance))
37 
38 import pyclustering.core.kmeans_wrapper as wrapper
39 
40 from pyclustering.core.wrapper import ccore_library
41 from pyclustering.core.metric_wrapper import metric_wrapper
42 
43 from pyclustering.cluster.encoder import type_encoding
44 from pyclustering.cluster import cluster_visualizer
45 
46 from pyclustering.utils.metric import distance_metric, type_metric
47 
48 
50  """!
51  @brief Observer of K-Means algorithm that is used to collect information about clustering process on each iteration of the algorithm.
52 
53  @see kmeans
54 
55  """
56 
57  def __init__(self):
58  """!
59  @brief Initializer of observer of K-Means algorithm.
60 
61  """
62  self.__evolution_clusters = []
63  self.__evolution_centers = []
64  self.__initial_centers = []
65 
66 
67  def __len__(self):
68  """!
69  @brief Returns amount of steps that were observer during clustering process in K-Means algorithm.
70 
71  """
72  return len(self.__evolution_clusters)
73 
74 
75  def notify(self, clusters, centers):
76  """!
77  @brief This method is called by K-Means algorithm to notify about changes.
78 
79  @param[in] clusters (array_like): Allocated clusters by K-Means algorithm.
80  @param[in] centers (array_like): Allocated centers by K-Means algorithm.
81 
82  """
83  self.__evolution_clusters.append(clusters)
84  self.__evolution_centers.append(centers)
85 
86 
87  def set_evolution_centers(self, evolution_centers):
88  """!
89  @brief Set evolution of changes of centers during clustering process.
90 
91  @param[in] evolution_centers (array_like): Evolution of changes of centers during clustering process.
92 
93  """
94  self.__evolution_centers = evolution_centers
95 
96 
97  def get_centers(self, iteration):
98  """!
99  @brief Get method to return centers at specific iteration of clustering process.
100 
101  @param[in] iteration (uint): Clustering process iteration at which centers are required.
102 
103  @return (array_like) Centers at specific iteration.
104 
105  """
106  return self.__evolution_centers[iteration]
107 
108 
109  def set_evolution_clusters(self, evolution_clusters):
110  """!
111  @brief Set evolution of changes of centers during clustering process.
112 
113  @param[in] evolution_clusters (array_like): Evolution of changes of clusters during clustering process.
114 
115  """
116  self.__evolution_clusters = evolution_clusters
117 
118 
119  def get_clusters(self, iteration):
120  """!
121  @brief Get method to return allocated clusters at specific iteration of clustering process.
122 
123  @param[in] iteration (uint): Clustering process iteration at which clusters are required.
124 
125  @return (array_like) Clusters at specific iteration.
126 
127  """
128  return self.__evolution_clusters[iteration]
129 
130 
131 
133  """!
134  @brief Visualizer of K-Means algorithm's results.
135  @details K-Means visualizer provides visualization services that are specific for K-Means algorithm.
136 
137  """
138 
139  __default_2d_marker_size = 15
140  __default_3d_marker_size = 70
141 
142 
143  @staticmethod
144  def show_clusters(sample, clusters, centers, initial_centers = None, **kwargs):
145  """!
146  @brief Display K-Means clustering results.
147 
148  @param[in] sample (list): Dataset that was used for clustering.
149  @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
150  @param[in] centers (array_like): Centers that were allocated by the algorithm.
151  @param[in] initial_centers (array_like): Initial centers that were used by the algorithm, if 'None' then initial centers are not displyed.
152  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'figure', 'display', 'offset').
153 
154  <b>Keyword Args:</b><br>
155  - figure (figure): If 'None' then new is figure is created, otherwise specified figure is used for visualization.
156  - display (bool): If 'True' then figure will be shown by the method, otherwise it should be shown manually using matplotlib function 'plt.show()'.
157  - offset (uint): Specify axes index on the figure where results should be drawn (only if argument 'figure' is specified).
158 
159  @return (figure) Figure where clusters were drawn.
160 
161  """
162 
163  visualizer = cluster_visualizer()
164  visualizer.append_clusters(clusters, sample)
165 
166  offset = kwargs.get('offset', 0)
167  figure = kwargs.get('figure', None)
168  display = kwargs.get('display', True)
169 
170  if figure is None:
171  figure = visualizer.show(display=False)
172  else:
173  visualizer.show(figure=figure, display=False)
174 
175  kmeans_visualizer.__draw_centers(figure, offset, visualizer, centers, initial_centers)
176  kmeans_visualizer.__draw_rays(figure, offset, visualizer, sample, clusters, centers)
177 
178  if display is True:
179  plt.show()
180 
181  return figure
182 
183 
184  @staticmethod
185  def __draw_rays(figure, offset, visualizer, sample, clusters, centers):
186  ax = figure.get_axes()[offset]
187 
188  for index_cluster in range(len(clusters)):
189  color = visualizer.get_cluster_color(index_cluster, 0)
190  kmeans_visualizer.__draw_cluster_rays(ax, color, sample, clusters[index_cluster], centers[index_cluster])
191 
192 
193  @staticmethod
194  def __draw_cluster_rays(ax, color, sample, cluster, center):
195  dimension = len(sample[0])
196 
197  for index_point in cluster:
198  point = sample[index_point]
199  if dimension == 1:
200  ax.plot([point[0], center[0]], [0.0, 0.0], '-', color=color, linewidth=0.5)
201  elif dimension == 2:
202  ax.plot([point[0], center[0]], [point[1], center[1]], '-', color=color, linewidth=0.5)
203  elif dimension == 3:
204  ax.plot([point[0], center[0]], [point[1], center[1]], [point[2], center[2]], '-', color=color, linewidth=0.5)
205 
206 
207  @staticmethod
208  def __draw_center(ax, center, color, marker, alpha):
209  dimension = len(center)
210 
211  if dimension == 1:
212  ax.plot(center[0], 0.0, color=color, alpha=alpha, marker=marker, markersize=kmeans_visualizer.__default_2d_marker_size)
213  elif dimension == 2:
214  ax.plot(center[0], center[1], color=color, alpha=alpha, marker=marker, markersize=kmeans_visualizer.__default_2d_marker_size)
215  elif dimension == 3:
216  ax.scatter(center[0], center[1], center[2], c=color, alpha=alpha, marker=marker, s=kmeans_visualizer.__default_3d_marker_size)
217 
218 
219  @staticmethod
220  def __draw_centers(figure, offset, visualizer, centers, initial_centers):
221  ax = figure.get_axes()[offset]
222 
223  for index_center in range(len(centers)):
224  color = visualizer.get_cluster_color(index_center, 0)
225  kmeans_visualizer.__draw_center(ax, centers[index_center], color, '*', 1.0)
226 
227  if initial_centers is not None:
228  kmeans_visualizer.__draw_center(ax, initial_centers[index_center], color, '*', 0.4)
229 
230 
231  @staticmethod
232  def animate_cluster_allocation(data, observer, animation_velocity=500, movie_fps=1, save_movie=None):
233  """!
234  @brief Animates clustering process that is performed by K-Means algorithm.
235 
236  @param[in] data (list): Dataset that is used for clustering.
237  @param[in] observer (kmeans_observer): EM observer that was used for collection information about clustering process.
238  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
239  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
240  @param[in] save_movie (string): If it is specified then animation will be stored to file that is specified in this parameter.
241 
242  """
243  figure = plt.figure()
244 
245  def init_frame():
246  return frame_generation(0)
247 
248  def frame_generation(index_iteration):
249  figure.clf()
250 
251  figure.suptitle("K-Means algorithm (iteration: " + str(index_iteration) + ")", fontsize=18, fontweight='bold')
252 
253  clusters = observer.get_clusters(index_iteration)
254  centers = observer.get_centers(index_iteration)
255  kmeans_visualizer.show_clusters(data, clusters, centers, None, figure=figure, display=False)
256 
257  figure.subplots_adjust(top=0.85)
258 
259  return [figure.gca()]
260 
261  iterations = len(observer)
262  cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval=animation_velocity,
263  init_func=init_frame, repeat_delay=5000)
264 
265  if save_movie is not None:
266  cluster_animation.save(save_movie, writer='ffmpeg', fps=movie_fps, bitrate=3000)
267  else:
268  plt.show()
269 
270 
271 
272 class kmeans:
273  """!
274  @brief Class implements K-Means clustering algorithm.
275  @details K-Means clustering aims to partition n observations into k clusters in which each observation belongs to
276  the cluster with the nearest mean, serving as a prototype of the cluster. This results in a partitioning
277  of the data space into Voronoi cells.
278 
279  K-Means clustering results depend on initial centers. Algorithm K-Means++ can used for initialization of
280  initial centers - see module 'pyclustering.cluster.center_initializer'.
281 
282  CCORE implementation (C/C++ part of the library) of the algorithm performs parallel processing to ensure maximum
283  performance.
284 
285  Implementation based on the paper @cite inproceedings::kmeans::1.
286 
287  @image html kmeans_example_clustering.png "Fig. 1. K-Means clustering results. At the left - 'Simple03.data' sample, at the right - 'Lsun.data' sample."
288 
289  Example #1 - Clustering using K-Means++ for center initialization:
290  @code
291  from pyclustering.cluster.kmeans import kmeans, kmeans_visualizer
292  from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
293  from pyclustering.samples.definitions import FCPS_SAMPLES
294  from pyclustering.utils import read_sample
295 
296  # Load list of points for cluster analysis.
297  sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS)
298 
299  # Prepare initial centers using K-Means++ method.
300  initial_centers = kmeans_plusplus_initializer(sample, 2).initialize()
301 
302  # Create instance of K-Means algorithm with prepared centers.
303  kmeans_instance = kmeans(sample, initial_centers)
304 
305  # Run cluster analysis and obtain results.
306  kmeans_instance.process()
307  clusters = kmeans_instance.get_clusters()
308  final_centers = kmeans_instance.get_centers()
309 
310  # Visualize obtained results
311  kmeans_visualizer.show_clusters(sample, clusters, final_centers)
312  @endcode
313 
314  Example #2 - Clustering using specific distance metric, for example, Manhattan distance:
315  @code
316  # prepare input data and initial centers for cluster analysis using K-Means
317 
318  # create metric that will be used for clustering
319  manhattan_metric = distance_metric(type_metric.MANHATTAN)
320 
321  # create instance of K-Means using specific distance metric:
322  kmeans_instance = kmeans(sample, initial_centers, metric=manhattan_metric)
323 
324  # run cluster analysis and obtain results
325  kmeans_instance.process()
326  clusters = kmeans_instance.get_clusters()
327  @endcode
328 
329  @see center_initializer
330 
331  """
332 
333  def __init__(self, data, initial_centers, tolerance=0.001, ccore=True, **kwargs):
334  """!
335  @brief Constructor of clustering algorithm K-Means.
336  @details Center initializer can be used for creating initial centers, for example, K-Means++ method.
337 
338  @param[in] data (array_like): Input data that is presented as array of points (objects), each point should be represented by array_like data structure.
339  @param[in] initial_centers (array_like): Initial coordinates of centers of clusters that are represented by array_like data structure: [center1, center2, ...].
340  @param[in] tolerance (double): Stop condition: if maximum value of change of centers of clusters is less than tolerance then algorithm stops processing.
341  @param[in] ccore (bool): Defines should be CCORE library (C++ pyclustering library) used instead of Python code or not.
342  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observer', 'metric', 'itermax').
343 
344  <b>Keyword Args:</b><br>
345  - observer (kmeans_observer): Observer of the algorithm to collect information about clustering process on each iteration.
346  - metric (distance_metric): Metric that is used for distance calculation between two points (by default euclidean square distance).
347  - itermax (uint): Maximum number of iterations that is used for clustering process (by default: 200).
348 
349  @see center_initializer
350 
351  """
352  self.__pointer_data = numpy.array(data)
353  self.__clusters = []
354  self.__centers = numpy.array(initial_centers)
355  self.__tolerance = tolerance
356  self.__total_wce = 0.0
357 
358  self.__observer = kwargs.get('observer', None)
359  self.__metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE))
360  self.__itermax = kwargs.get('itermax', 100)
361 
362  if self.__metric.get_type() != type_metric.USER_DEFINED:
363  self.__metric.enable_numpy_usage()
364  else:
365  self.__metric.disable_numpy_usage()
366 
367  self.__ccore = ccore and self.__metric.get_type() != type_metric.USER_DEFINED
368  if self.__ccore is True:
369  self.__ccore = ccore_library.workable()
370 
371  self.__verify_arguments()
372 
373 
374  def process(self):
375  """!
376  @brief Performs cluster analysis in line with rules of K-Means algorithm.
377 
378  @return (kmeans) Returns itself (K-Means instance).
379 
380  @see get_clusters()
381  @see get_centers()
382 
383  """
384 
385  if len(self.__pointer_data[0]) != len(self.__centers[0]):
386  raise ValueError("Dimension of the input data and dimension of the initial cluster centers must be equal.")
387 
388  if self.__ccore is True:
389  self.__process_by_ccore()
390  else:
391  self.__process_by_python()
392 
393  return self
394 
395 
396  def __process_by_ccore(self):
397  """!
398  @brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
399 
400  """
401  ccore_metric = metric_wrapper.create_instance(self.__metric)
402 
403  results = wrapper.kmeans(self.__pointer_data, self.__centers, self.__tolerance, self.__itermax, (self.__observer is not None), ccore_metric.get_pointer())
404  self.__clusters = results[0]
405  self.__centers = results[1]
406 
407  if self.__observer is not None:
408  self.__observer.set_evolution_clusters(results[2])
409  self.__observer.set_evolution_centers(results[3])
410 
411  self.__total_wce = results[4][0]
412 
413 
414  def __process_by_python(self):
415  """!
416  @brief Performs cluster analysis using python code.
417 
418  """
419 
420  maximum_change = float('inf')
421  iteration = 0
422 
423  if self.__observer is not None:
424  initial_clusters = self.__update_clusters()
425  self.__observer.notify(initial_clusters, self.__centers.tolist())
426 
427  while maximum_change > self.__tolerance and iteration < self.__itermax:
428  self.__clusters = self.__update_clusters()
429  updated_centers = self.__update_centers() # changes should be calculated before assignment
430 
431  if self.__observer is not None:
432  self.__observer.notify(self.__clusters, updated_centers.tolist())
433 
434  maximum_change = self.__calculate_changes(updated_centers)
435 
436  self.__centers = updated_centers # assign center after change calculation
437  iteration += 1
438 
439  self.__calculate_total_wce()
440 
441 
442  def predict(self, points):
443  """!
444  @brief Calculates the closest cluster to each point.
445 
446  @param[in] points (array_like): Points for which closest clusters are calculated.
447 
448  @return (list) List of closest clusters for each point. Each cluster is denoted by index. Return empty
449  collection if 'process()' method was not called.
450 
451  """
452 
453  nppoints = numpy.array(points)
454  if len(self.__clusters) == 0:
455  return []
456 
457  differences = numpy.zeros((len(nppoints), len(self.__centers)))
458  for index_point in range(len(nppoints)):
459  if self.__metric.get_type() != type_metric.USER_DEFINED:
460  differences[index_point] = self.__metric(nppoints[index_point], self.__centers)
461  else:
462  differences[index_point] = [ self.__metric(nppoints[index_point], center) for center in self.__centers ]
463 
464  return numpy.argmin(differences, axis=1)
465 
466 
467  def get_clusters(self):
468  """!
469  @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
470 
471  @see process()
472  @see get_centers()
473 
474  """
475 
476  return self.__clusters
477 
478 
479  def get_centers(self):
480  """!
481  @brief Returns list of centers of allocated clusters.
482 
483  @see process()
484  @see get_clusters()
485 
486  """
487 
488  if isinstance(self.__centers, list):
489  return self.__centers
490 
491  return self.__centers.tolist()
492 
493 
494  def get_total_wce(self):
495  """!
496  @brief Returns sum of metric errors that depends on metric that was used for clustering (by default SSE - Sum of Squared Errors).
497  @details Sum of metric errors is calculated using distance between point and its center:
498  \f[error=\sum_{i=0}^{N}distance(x_{i}-center(x_{i}))\f]
499 
500  @see process()
501  @see get_clusters()
502 
503  """
504 
505  return self.__total_wce
506 
507 
509  """!
510  @brief Returns clustering result representation type that indicate how clusters are encoded.
511 
512  @return (type_encoding) Clustering result representation.
513 
514  @see get_clusters()
515 
516  """
517 
518  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
519 
520 
521  def __update_clusters(self):
522  """!
523  @brief Calculate distance (in line with specified metric) to each point from the each cluster. Nearest points
524  are captured by according clusters and as a result clusters are updated.
525 
526  @return (list) Updated clusters as list of clusters. Each cluster contains indexes of objects from data.
527 
528  """
529 
530  clusters = [[] for _ in range(len(self.__centers))]
531 
532  dataset_differences = self.__calculate_dataset_difference(len(clusters))
533 
534  optimum_indexes = numpy.argmin(dataset_differences, axis=0)
535  for index_point in range(len(optimum_indexes)):
536  index_cluster = optimum_indexes[index_point]
537  clusters[index_cluster].append(index_point)
538 
539  clusters = [cluster for cluster in clusters if len(cluster) > 0]
540 
541  return clusters
542 
543 
544  def __update_centers(self):
545  """!
546  @brief Calculate centers of clusters in line with contained objects.
547 
548  @return (numpy.array) Updated centers.
549 
550  """
551 
552  dimension = self.__pointer_data.shape[1]
553  centers = numpy.zeros((len(self.__clusters), dimension))
554 
555  for index in range(len(self.__clusters)):
556  cluster_points = self.__pointer_data[self.__clusters[index], :]
557  centers[index] = cluster_points.mean(axis=0)
558 
559  return numpy.array(centers)
560 
561 
562  def __calculate_total_wce(self):
563  """!
564  @brief Calculate total within cluster errors that is depend on metric that was chosen for K-Means algorithm.
565 
566  """
567 
568  dataset_differences = self.__calculate_dataset_difference(len(self.__clusters))
569 
570  self.__total_wce = 0
571  for index_cluster in range(len(self.__clusters)):
572  for index_point in self.__clusters[index_cluster]:
573  self.__total_wce += dataset_differences[index_cluster][index_point]
574 
575 
576  def __calculate_dataset_difference(self, amount_clusters):
577  """!
578  @brief Calculate distance from each point to each cluster center.
579 
580  """
581  dataset_differences = numpy.zeros((amount_clusters, len(self.__pointer_data)))
582  for index_center in range(amount_clusters):
583  if self.__metric.get_type() != type_metric.USER_DEFINED:
584  dataset_differences[index_center] = self.__metric(self.__pointer_data, self.__centers[index_center])
585  else:
586  dataset_differences[index_center] = [ self.__metric(point, self.__centers[index_center])
587  for point in self.__pointer_data ]
588 
589  return dataset_differences
590 
591 
592  def __calculate_changes(self, updated_centers):
593  """!
594  @brief Calculates changes estimation between previous and current iteration using centers for that purpose.
595 
596  @param[in] updated_centers (array_like): New cluster centers.
597 
598  @return (float) Maximum changes between centers.
599 
600  """
601  if len(self.__centers) != len(updated_centers):
602  maximum_change = float('inf')
603 
604  else:
605  if self.__metric.get_type() != type_metric.USER_DEFINED:
606  changes = self.__metric(self.__centers, updated_centers)
607  else:
608  changes = [self.__metric(center, updated_center) for center, updated_center in zip(self.__centers, updated_centers)]
609 
610  maximum_change = numpy.max(changes)
611 
612  return maximum_change
613 
614 
615  def __verify_arguments(self):
616  """!
617  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
618 
619  """
620  if len(self.__pointer_data) == 0:
621  raise ValueError("Input data is empty (size: '%d')." % len(self.__pointer_data))
622 
623  if len(self.__centers) == 0:
624  raise ValueError("Initial centers are empty (size: '%d')." % len(self.__pointer_data))
625 
626  if self.__tolerance < 0:
627  raise ValueError("Tolerance (current value: '%d') should be greater or equal to 0." %
628  self.__tolerance)
629 
630  if self.__itermax < 0:
631  raise ValueError("Maximum iterations (current value: '%d') should be greater or equal to 0." %
632  self.__tolerance)
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:390
pyclustering module for cluster analysis.
Definition: __init__.py:1
def notify(self, clusters, centers)
This method is called by K-Means algorithm to notify about changes.
Definition: kmeans.py:75
def get_centers(self)
Returns list of centers of allocated clusters.
Definition: kmeans.py:479
def __calculate_dataset_difference(self, amount_clusters)
Calculate distance from each point to each cluster center.
Definition: kmeans.py:576
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: kmeans.py:615
def process(self)
Performs cluster analysis in line with rules of K-Means algorithm.
Definition: kmeans.py:374
Module provides various distance metrics - abstraction of the notion of distance in a metric space...
Definition: metric.py:1
def get_clusters(self, iteration)
Get method to return allocated clusters at specific iteration of clustering process.
Definition: kmeans.py:119
def __calculate_changes(self, updated_centers)
Calculates changes estimation between previous and current iteration using centers for that purpose...
Definition: kmeans.py:592
Module for representing clustering results.
Definition: encoder.py:1
Distance metric performs distance calculation between two points in line with encapsulated function...
Definition: metric.py:67
Observer of K-Means algorithm that is used to collect information about clustering process on each it...
Definition: kmeans.py:49
def __update_centers(self)
Calculate centers of clusters in line with contained objects.
Definition: kmeans.py:544
def get_total_wce(self)
Returns sum of metric errors that depends on metric that was used for clustering (by default SSE - Su...
Definition: kmeans.py:494
Class implements K-Means clustering algorithm.
Definition: kmeans.py:272
def __init__(self)
Initializer of observer of K-Means algorithm.
Definition: kmeans.py:57
Visualizer of K-Means algorithm&#39;s results.
Definition: kmeans.py:132
def get_centers(self, iteration)
Get method to return centers at specific iteration of clustering process.
Definition: kmeans.py:97
def __calculate_total_wce(self)
Calculate total within cluster errors that is depend on metric that was chosen for K-Means algorithm...
Definition: kmeans.py:562
def set_evolution_centers(self, evolution_centers)
Set evolution of changes of centers during clustering process.
Definition: kmeans.py:87
def __init__(self, data, initial_centers, tolerance=0.001, ccore=True, kwargs)
Constructor of clustering algorithm K-Means.
Definition: kmeans.py:333
def __len__(self)
Returns amount of steps that were observer during clustering process in K-Means algorithm.
Definition: kmeans.py:67
def set_evolution_clusters(self, evolution_clusters)
Set evolution of changes of centers during clustering process.
Definition: kmeans.py:109
def show_clusters(sample, clusters, centers, initial_centers=None, kwargs)
Display K-Means clustering results.
Definition: kmeans.py:144
def get_clusters(self)
Returns list of allocated clusters, each cluster contains indexes of objects in list of data...
Definition: kmeans.py:467
def __process_by_ccore(self)
Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
Definition: kmeans.py:396
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: kmeans.py:508
def __update_clusters(self)
Calculate distance (in line with specified metric) to each point from the each cluster.
Definition: kmeans.py:521
def __process_by_python(self)
Performs cluster analysis using python code.
Definition: kmeans.py:414
def animate_cluster_allocation(data, observer, animation_velocity=500, movie_fps=1, save_movie=None)
Animates clustering process that is performed by K-Means algorithm.
Definition: kmeans.py:232
def predict(self, points)
Calculates the closest cluster to each point.
Definition: kmeans.py:442