pyclustering  0.10.1
pyclustring is a Python, C++ data mining library.
kmeans.py
1 """!
2 
3 @brief The module contains K-Means algorithm and other related services.
4 @details Implementation based on paper @cite inproceedings::kmeans::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright BSD-3-Clause
9 
10 """
11 
12 
13 import copy
14 import numpy
15 
16 import matplotlib.pyplot as plt
17 import matplotlib.animation as animation
18 
19 import pyclustering.core.kmeans_wrapper as wrapper
20 
21 from pyclustering.core.wrapper import ccore_library
22 from pyclustering.core.metric_wrapper import metric_wrapper
23 
24 from pyclustering.cluster.encoder import type_encoding
25 from pyclustering.cluster import cluster_visualizer
26 
27 from pyclustering.utils.metric import distance_metric, type_metric
28 
29 
31  """!
32  @brief Observer of K-Means algorithm that is used to collect information about clustering process on each iteration of the algorithm.
33 
34  @see kmeans
35 
36  """
37 
38  def __init__(self):
39  """!
40  @brief Initializer of observer of K-Means algorithm.
41 
42  """
43  self.__evolution_clusters = []
44  self.__evolution_centers = []
45  self.__initial_centers = []
46 
47 
48  def __len__(self):
49  """!
50  @brief Returns amount of steps that were observer during clustering process in K-Means algorithm.
51 
52  """
53  return len(self.__evolution_clusters)
54 
55 
56  def notify(self, clusters, centers):
57  """!
58  @brief This method is called by K-Means algorithm to notify about changes.
59 
60  @param[in] clusters (array_like): Allocated clusters by K-Means algorithm.
61  @param[in] centers (array_like): Allocated centers by K-Means algorithm.
62 
63  """
64  self.__evolution_clusters.append(clusters)
65  self.__evolution_centers.append(centers)
66 
67 
68  def set_evolution_centers(self, evolution_centers):
69  """!
70  @brief Set evolution of changes of centers during clustering process.
71 
72  @param[in] evolution_centers (array_like): Evolution of changes of centers during clustering process.
73 
74  """
75  self.__evolution_centers = evolution_centers
76 
77 
78  def get_centers(self, iteration):
79  """!
80  @brief Get method to return centers at specific iteration of clustering process.
81 
82  @param[in] iteration (uint): Clustering process iteration at which centers are required.
83 
84  @return (array_like) Centers at specific iteration.
85 
86  """
87  return self.__evolution_centers[iteration]
88 
89 
90  def set_evolution_clusters(self, evolution_clusters):
91  """!
92  @brief Set evolution of changes of centers during clustering process.
93 
94  @param[in] evolution_clusters (array_like): Evolution of changes of clusters during clustering process.
95 
96  """
97  self.__evolution_clusters = evolution_clusters
98 
99 
100  def get_clusters(self, iteration):
101  """!
102  @brief Get method to return allocated clusters at specific iteration of clustering process.
103 
104  @param[in] iteration (uint): Clustering process iteration at which clusters are required.
105 
106  @return (array_like) Clusters at specific iteration.
107 
108  """
109  return self.__evolution_clusters[iteration]
110 
111 
112 
114  """!
115  @brief Visualizer of K-Means algorithm's results.
116  @details K-Means visualizer provides visualization services that are specific for K-Means algorithm.
117 
118  """
119 
120  __default_2d_marker_size = 15
121  __default_3d_marker_size = 70
122 
123 
124  @staticmethod
125  def show_clusters(sample, clusters, centers, initial_centers = None, **kwargs):
126  """!
127  @brief Display K-Means clustering results.
128 
129  @param[in] sample (list): Dataset that was used for clustering.
130  @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
131  @param[in] centers (array_like): Centers that were allocated by the algorithm.
132  @param[in] initial_centers (array_like): Initial centers that were used by the algorithm, if 'None' then initial centers are not displyed.
133  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'figure', 'display', 'offset').
134 
135  <b>Keyword Args:</b><br>
136  - figure (figure): If 'None' then new is figure is created, otherwise specified figure is used for visualization.
137  - display (bool): If 'True' then figure will be shown by the method, otherwise it should be shown manually using matplotlib function 'plt.show()'.
138  - offset (uint): Specify axes index on the figure where results should be drawn (only if argument 'figure' is specified).
139 
140  @return (figure) Figure where clusters were drawn.
141 
142  """
143 
144  visualizer = cluster_visualizer()
145  visualizer.append_clusters(clusters, sample)
146 
147  offset = kwargs.get('offset', 0)
148  figure = kwargs.get('figure', None)
149  display = kwargs.get('display', True)
150 
151  if figure is None:
152  figure = visualizer.show(display=False)
153  else:
154  visualizer.show(figure=figure, display=False)
155 
156  kmeans_visualizer.__draw_centers(figure, offset, visualizer, centers, initial_centers)
157  kmeans_visualizer.__draw_rays(figure, offset, visualizer, sample, clusters, centers)
158 
159  if display is True:
160  plt.show()
161 
162  return figure
163 
164 
165  @staticmethod
166  def __draw_rays(figure, offset, visualizer, sample, clusters, centers):
167  ax = figure.get_axes()[offset]
168 
169  for index_cluster in range(len(clusters)):
170  color = visualizer.get_cluster_color(index_cluster, 0)
171  kmeans_visualizer.__draw_cluster_rays(ax, color, sample, clusters[index_cluster], centers[index_cluster])
172 
173 
174  @staticmethod
175  def __draw_cluster_rays(ax, color, sample, cluster, center):
176  dimension = len(sample[0])
177 
178  for index_point in cluster:
179  point = sample[index_point]
180  if dimension == 1:
181  ax.plot([point[0], center[0]], [0.0, 0.0], '-', color=color, linewidth=0.5)
182  elif dimension == 2:
183  ax.plot([point[0], center[0]], [point[1], center[1]], '-', color=color, linewidth=0.5)
184  elif dimension == 3:
185  ax.plot([point[0], center[0]], [point[1], center[1]], [point[2], center[2]], '-', color=color, linewidth=0.5)
186 
187 
188  @staticmethod
189  def __draw_center(ax, center, color, marker, alpha):
190  dimension = len(center)
191 
192  if dimension == 1:
193  ax.plot(center[0], 0.0, color=color, alpha=alpha, marker=marker, markersize=kmeans_visualizer.__default_2d_marker_size)
194  elif dimension == 2:
195  ax.plot(center[0], center[1], color=color, alpha=alpha, marker=marker, markersize=kmeans_visualizer.__default_2d_marker_size)
196  elif dimension == 3:
197  ax.scatter(center[0], center[1], center[2], c=color, alpha=alpha, marker=marker, s=kmeans_visualizer.__default_3d_marker_size)
198 
199 
200  @staticmethod
201  def __draw_centers(figure, offset, visualizer, centers, initial_centers):
202  ax = figure.get_axes()[offset]
203 
204  for index_center in range(len(centers)):
205  color = visualizer.get_cluster_color(index_center, 0)
206  kmeans_visualizer.__draw_center(ax, centers[index_center], color, '*', 1.0)
207 
208  if initial_centers is not None:
209  kmeans_visualizer.__draw_center(ax, initial_centers[index_center], color, '*', 0.4)
210 
211 
212  @staticmethod
213  def animate_cluster_allocation(data, observer, animation_velocity=500, movie_fps=1, save_movie=None):
214  """!
215  @brief Animates clustering process that is performed by K-Means algorithm.
216 
217  @param[in] data (list): Dataset that is used for clustering.
218  @param[in] observer (kmeans_observer): EM observer that was used for collection information about clustering process.
219  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
220  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
221  @param[in] save_movie (string): If it is specified then animation will be stored to file that is specified in this parameter.
222 
223  """
224  figure = plt.figure()
225 
226  def init_frame():
227  return frame_generation(0)
228 
229  def frame_generation(index_iteration):
230  figure.clf()
231 
232  figure.suptitle("K-Means algorithm (iteration: " + str(index_iteration) + ")", fontsize=18, fontweight='bold')
233 
234  clusters = observer.get_clusters(index_iteration)
235  centers = observer.get_centers(index_iteration)
236  kmeans_visualizer.show_clusters(data, clusters, centers, None, figure=figure, display=False)
237 
238  figure.subplots_adjust(top=0.85)
239 
240  return [figure.gca()]
241 
242  iterations = len(observer)
243  cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval=animation_velocity,
244  init_func=init_frame, repeat_delay=5000)
245 
246  if save_movie is not None:
247  cluster_animation.save(save_movie, writer='ffmpeg', fps=movie_fps, bitrate=3000)
248  else:
249  plt.show()
250 
251 
252 
253 class kmeans:
254  """!
255  @brief Class implements K-Means clustering algorithm.
256  @details K-Means clustering aims to partition n observations into k clusters in which each observation belongs to
257  the cluster with the nearest mean, serving as a prototype of the cluster. This results in a partitioning
258  of the data space into Voronoi cells.
259 
260  K-Means clustering results depend on initial centers. Algorithm K-Means++ can used for initialization of
261  initial centers - see module 'pyclustering.cluster.center_initializer'.
262 
263  CCORE implementation (C/C++ part of the library) of the algorithm performs parallel processing to ensure maximum
264  performance.
265 
266  Implementation based on the paper @cite inproceedings::kmeans::1.
267 
268  @image html kmeans_example_clustering.png "Fig. 1. K-Means clustering results. At the left - 'Simple03.data' sample, at the right - 'Lsun.data' sample."
269 
270  Example #1 - Clustering using K-Means++ for center initialization:
271  @code
272  from pyclustering.cluster.kmeans import kmeans, kmeans_visualizer
273  from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
274  from pyclustering.samples.definitions import FCPS_SAMPLES
275  from pyclustering.utils import read_sample
276 
277  # Load list of points for cluster analysis.
278  sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS)
279 
280  # Prepare initial centers using K-Means++ method.
281  initial_centers = kmeans_plusplus_initializer(sample, 2).initialize()
282 
283  # Create instance of K-Means algorithm with prepared centers.
284  kmeans_instance = kmeans(sample, initial_centers)
285 
286  # Run cluster analysis and obtain results.
287  kmeans_instance.process()
288  clusters = kmeans_instance.get_clusters()
289  final_centers = kmeans_instance.get_centers()
290 
291  # Visualize obtained results
292  kmeans_visualizer.show_clusters(sample, clusters, final_centers)
293  @endcode
294 
295  Example #2 - Clustering using specific distance metric, for example, Manhattan distance:
296  @code
297  # prepare input data and initial centers for cluster analysis using K-Means
298 
299  # create metric that will be used for clustering
300  manhattan_metric = distance_metric(type_metric.MANHATTAN)
301 
302  # create instance of K-Means using specific distance metric:
303  kmeans_instance = kmeans(sample, initial_centers, metric=manhattan_metric)
304 
305  # run cluster analysis and obtain results
306  kmeans_instance.process()
307  clusters = kmeans_instance.get_clusters()
308  @endcode
309 
310  @see center_initializer
311 
312  """
313 
314  def __init__(self, data, initial_centers, tolerance=0.001, ccore=True, **kwargs):
315  """!
316  @brief Constructor of clustering algorithm K-Means.
317  @details Center initializer can be used for creating initial centers, for example, K-Means++ method.
318 
319  @param[in] data (array_like): Input data that is presented as array of points (objects), each point should be represented by array_like data structure.
320  @param[in] initial_centers (array_like): Initial coordinates of centers of clusters that are represented by array_like data structure: [center1, center2, ...].
321  @param[in] tolerance (double): Stop condition: if maximum value of change of centers of clusters is less than tolerance then algorithm stops processing.
322  @param[in] ccore (bool): Defines should be CCORE library (C++ pyclustering library) used instead of Python code or not.
323  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observer', 'metric', 'itermax').
324 
325  <b>Keyword Args:</b><br>
326  - observer (kmeans_observer): Observer of the algorithm to collect information about clustering process on each iteration.
327  - metric (distance_metric): Metric that is used for distance calculation between two points (by default euclidean square distance).
328  - itermax (uint): Maximum number of iterations that is used for clustering process (by default: 200).
329 
330  @see center_initializer
331 
332  """
333  self.__pointer_data = numpy.array(data)
334  self.__clusters = []
335  self.__centers = numpy.array(initial_centers)
336  self.__tolerance = tolerance
337  self.__total_wce = 0.0
338 
339  self.__observer = kwargs.get('observer', None)
340  self.__metric = copy.copy(kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE)))
341  self.__itermax = kwargs.get('itermax', 100)
342 
343  if self.__metric.get_type() != type_metric.USER_DEFINED:
344  self.__metric.enable_numpy_usage()
345  else:
346  self.__metric.disable_numpy_usage()
347 
348  self.__ccore = ccore and self.__metric.get_type() != type_metric.USER_DEFINED
349  if self.__ccore is True:
350  self.__ccore = ccore_library.workable()
351 
352  self.__verify_arguments()
353 
354 
355  def process(self):
356  """!
357  @brief Performs cluster analysis in line with rules of K-Means algorithm.
358 
359  @return (kmeans) Returns itself (K-Means instance).
360 
361  @see get_clusters()
362  @see get_centers()
363 
364  """
365 
366  if len(self.__pointer_data[0]) != len(self.__centers[0]):
367  raise ValueError("Dimension of the input data and dimension of the initial cluster centers must be equal.")
368 
369  if self.__ccore is True:
370  self.__process_by_ccore()
371  else:
372  self.__process_by_python()
373 
374  return self
375 
376 
377  def __process_by_ccore(self):
378  """!
379  @brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
380 
381  """
382  ccore_metric = metric_wrapper.create_instance(self.__metric)
383 
384  results = wrapper.kmeans(self.__pointer_data, self.__centers, self.__tolerance, self.__itermax,
385  (self.__observer is not None), ccore_metric.get_pointer())
386 
387  self.__clusters = results[0]
388  self.__centers = results[1]
389 
390  if self.__observer is not None:
391  self.__observer.set_evolution_clusters(results[2])
392  self.__observer.set_evolution_centers(results[3])
393 
394  self.__total_wce = results[4][0]
395 
396 
397  def __process_by_python(self):
398  """!
399  @brief Performs cluster analysis using python code.
400 
401  """
402 
403  maximum_change = float('inf')
404  iteration = 0
405 
406  if self.__observer is not None:
407  initial_clusters = self.__update_clusters()
408  self.__observer.notify(initial_clusters, self.__centers.tolist())
409 
410  while maximum_change > self.__tolerance and iteration < self.__itermax:
411  self.__clusters = self.__update_clusters()
412  updated_centers = self.__update_centers() # changes should be calculated before assignment
413 
414  if self.__observer is not None:
415  self.__observer.notify(self.__clusters, updated_centers.tolist())
416 
417  maximum_change = self.__calculate_changes(updated_centers)
418 
419  self.__centers = updated_centers # assign center after change calculation
420  iteration += 1
421 
422  self.__calculate_total_wce()
423 
424 
425  def predict(self, points):
426  """!
427  @brief Calculates the closest cluster to each point.
428 
429  @param[in] points (array_like): Points for which closest clusters are calculated.
430 
431  @return (list) List of closest clusters for each point. Each cluster is denoted by index. Return empty
432  collection if 'process()' method was not called.
433 
434  """
435 
436  nppoints = numpy.array(points)
437  if len(self.__clusters) == 0:
438  return []
439 
440  differences = numpy.zeros((len(nppoints), len(self.__centers)))
441  for index_point in range(len(nppoints)):
442  if self.__metric.get_type() != type_metric.USER_DEFINED:
443  differences[index_point] = self.__metric(nppoints[index_point], self.__centers)
444  else:
445  differences[index_point] = [self.__metric(nppoints[index_point], center) for center in self.__centers]
446 
447  return numpy.argmin(differences, axis=1)
448 
449 
450  def get_clusters(self):
451  """!
452  @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
453 
454  @see process()
455  @see get_centers()
456 
457  """
458 
459  return self.__clusters
460 
461 
462  def get_centers(self):
463  """!
464  @brief Returns list of centers of allocated clusters.
465 
466  @see process()
467  @see get_clusters()
468 
469  """
470 
471  if isinstance(self.__centers, list):
472  return self.__centers
473 
474  return self.__centers.tolist()
475 
476 
477  def get_total_wce(self):
478  """!
479  @brief Returns sum of metric errors that depends on metric that was used for clustering (by default SSE - Sum of Squared Errors).
480  @details Sum of metric errors is calculated using distance between point and its center:
481  \f[error=\sum_{i=0}^{N}distance(x_{i}-center(x_{i}))\f]
482 
483  @see process()
484  @see get_clusters()
485 
486  """
487 
488  return self.__total_wce
489 
490 
492  """!
493  @brief Returns clustering result representation type that indicate how clusters are encoded.
494 
495  @return (type_encoding) Clustering result representation.
496 
497  @see get_clusters()
498 
499  """
500 
501  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
502 
503 
504  def __update_clusters(self):
505  """!
506  @brief Calculate distance (in line with specified metric) to each point from the each cluster. Nearest points
507  are captured by according clusters and as a result clusters are updated.
508 
509  @return (list) Updated clusters as list of clusters. Each cluster contains indexes of objects from data.
510 
511  """
512 
513  clusters = [[] for _ in range(len(self.__centers))]
514 
515  dataset_differences = self.__calculate_dataset_difference(len(clusters))
516 
517  optimum_indexes = numpy.argmin(dataset_differences, axis=0)
518  for index_point in range(len(optimum_indexes)):
519  index_cluster = optimum_indexes[index_point]
520  clusters[index_cluster].append(index_point)
521 
522  clusters = [cluster for cluster in clusters if len(cluster) > 0]
523 
524  return clusters
525 
526 
527  def __update_centers(self):
528  """!
529  @brief Calculate centers of clusters in line with contained objects.
530 
531  @return (numpy.array) Updated centers.
532 
533  """
534 
535  dimension = self.__pointer_data.shape[1]
536  centers = numpy.zeros((len(self.__clusters), dimension))
537 
538  for index in range(len(self.__clusters)):
539  cluster_points = self.__pointer_data[self.__clusters[index], :]
540  centers[index] = cluster_points.mean(axis=0)
541 
542  return numpy.array(centers)
543 
544 
545  def __calculate_total_wce(self):
546  """!
547  @brief Calculate total within cluster errors that is depend on metric that was chosen for K-Means algorithm.
548 
549  """
550 
551  dataset_differences = self.__calculate_dataset_difference(len(self.__clusters))
552 
553  self.__total_wce = 0.0
554  for index_cluster in range(len(self.__clusters)):
555  for index_point in self.__clusters[index_cluster]:
556  self.__total_wce += dataset_differences[index_cluster][index_point]
557 
558 
559  def __calculate_dataset_difference(self, amount_clusters):
560  """!
561  @brief Calculate distance from each point to each cluster center.
562 
563  """
564  dataset_differences = numpy.zeros((amount_clusters, len(self.__pointer_data)))
565  for index_center in range(amount_clusters):
566  if self.__metric.get_type() != type_metric.USER_DEFINED:
567  dataset_differences[index_center] = self.__metric(self.__pointer_data, self.__centers[index_center])
568  else:
569  dataset_differences[index_center] = [self.__metric(point, self.__centers[index_center])
570  for point in self.__pointer_data]
571 
572  return dataset_differences
573 
574 
575  def __calculate_changes(self, updated_centers):
576  """!
577  @brief Calculates changes estimation between previous and current iteration using centers for that purpose.
578 
579  @param[in] updated_centers (array_like): New cluster centers.
580 
581  @return (float) Maximum changes between centers.
582 
583  """
584  if len(self.__centers) != len(updated_centers):
585  maximum_change = float('inf')
586 
587  else:
588  if self.__metric.get_type() != type_metric.USER_DEFINED:
589  changes = self.__metric(self.__centers, updated_centers)
590  else:
591  changes = [self.__metric(center, updated_center) for center, updated_center in zip(self.__centers, updated_centers)]
592 
593  maximum_change = numpy.max(changes)
594 
595  return maximum_change
596 
597 
598  def __verify_arguments(self):
599  """!
600  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
601 
602  """
603  if len(self.__pointer_data) == 0:
604  raise ValueError("Input data is empty (size: '%d')." % len(self.__pointer_data))
605 
606  if len(self.__centers) == 0:
607  raise ValueError("Initial centers are empty (size: '%d')." % len(self.__pointer_data))
608 
609  if self.__tolerance < 0:
610  raise ValueError("Tolerance (current value: '%d') should be greater or equal to 0." %
611  self.__tolerance)
612 
613  if self.__itermax < 0:
614  raise ValueError("Maximum iterations (current value: '%d') should be greater or equal to 0." %
615  self.__tolerance)
pyclustering.cluster.kmeans.kmeans.__ccore
__ccore
Definition: kmeans.py:348
pyclustering.cluster.kmeans.kmeans
Class implements K-Means clustering algorithm.
Definition: kmeans.py:253
pyclustering.cluster.cluster_visualizer
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:370
pyclustering.cluster.kmeans.kmeans.get_clusters
def get_clusters(self)
Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
Definition: kmeans.py:450
pyclustering.cluster.kmeans.kmeans_observer.__initial_centers
__initial_centers
Definition: kmeans.py:45
pyclustering.cluster.kmeans.kmeans.__calculate_changes
def __calculate_changes(self, updated_centers)
Calculates changes estimation between previous and current iteration using centers for that purpose.
Definition: kmeans.py:575
pyclustering.cluster.kmeans.kmeans.predict
def predict(self, points)
Calculates the closest cluster to each point.
Definition: kmeans.py:425
pyclustering.cluster.kmeans.kmeans.__process_by_ccore
def __process_by_ccore(self)
Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
Definition: kmeans.py:377
pyclustering.cluster.kmeans.kmeans_observer.notify
def notify(self, clusters, centers)
This method is called by K-Means algorithm to notify about changes.
Definition: kmeans.py:56
pyclustering.cluster.kmeans.kmeans.process
def process(self)
Performs cluster analysis in line with rules of K-Means algorithm.
Definition: kmeans.py:355
pyclustering.cluster.kmeans.kmeans.__tolerance
__tolerance
Definition: kmeans.py:336
pyclustering.cluster.kmeans.kmeans.__calculate_total_wce
def __calculate_total_wce(self)
Calculate total within cluster errors that is depend on metric that was chosen for K-Means algorithm.
Definition: kmeans.py:545
pyclustering.cluster.kmeans.kmeans_observer.__evolution_centers
__evolution_centers
Definition: kmeans.py:44
pyclustering.cluster.kmeans.kmeans_observer
Observer of K-Means algorithm that is used to collect information about clustering process on each it...
Definition: kmeans.py:30
pyclustering.cluster.kmeans.kmeans_observer.get_clusters
def get_clusters(self, iteration)
Get method to return allocated clusters at specific iteration of clustering process.
Definition: kmeans.py:100
pyclustering.cluster.kmeans.kmeans.__clusters
__clusters
Definition: kmeans.py:334
pyclustering.cluster.kmeans.kmeans_visualizer
Visualizer of K-Means algorithm's results.
Definition: kmeans.py:113
pyclustering.cluster.kmeans.kmeans.__update_clusters
def __update_clusters(self)
Calculate distance (in line with specified metric) to each point from the each cluster.
Definition: kmeans.py:504
pyclustering.cluster.kmeans.kmeans.get_centers
def get_centers(self)
Returns list of centers of allocated clusters.
Definition: kmeans.py:462
pyclustering.cluster.kmeans.kmeans.get_cluster_encoding
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: kmeans.py:491
pyclustering.cluster.kmeans.kmeans_observer.__init__
def __init__(self)
Initializer of observer of K-Means algorithm.
Definition: kmeans.py:38
pyclustering.cluster.kmeans.kmeans_visualizer.animate_cluster_allocation
def animate_cluster_allocation(data, observer, animation_velocity=500, movie_fps=1, save_movie=None)
Animates clustering process that is performed by K-Means algorithm.
Definition: kmeans.py:213
pyclustering.cluster.kmeans.kmeans.__verify_arguments
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: kmeans.py:598
pyclustering.cluster.kmeans.kmeans.__pointer_data
__pointer_data
Definition: kmeans.py:333
pyclustering.cluster
pyclustering module for cluster analysis.
Definition: __init__.py:1
pyclustering.cluster.kmeans.kmeans_visualizer.show_clusters
def show_clusters(sample, clusters, centers, initial_centers=None, **kwargs)
Display K-Means clustering results.
Definition: kmeans.py:125
pyclustering.cluster.kmeans.kmeans.__process_by_python
def __process_by_python(self)
Performs cluster analysis using python code.
Definition: kmeans.py:397
pyclustering.utils.metric.distance_metric
Distance metric performs distance calculation between two points in line with encapsulated function,...
Definition: metric.py:52
pyclustering.cluster.kmeans.kmeans.__observer
__observer
Definition: kmeans.py:339
pyclustering.cluster.kmeans.kmeans_observer.get_centers
def get_centers(self, iteration)
Get method to return centers at specific iteration of clustering process.
Definition: kmeans.py:78
pyclustering.cluster.kmeans.kmeans.__centers
__centers
Definition: kmeans.py:335
pyclustering.cluster.kmeans.kmeans.__metric
__metric
Definition: kmeans.py:340
pyclustering.cluster.kmeans.kmeans_observer.__len__
def __len__(self)
Returns amount of steps that were observer during clustering process in K-Means algorithm.
Definition: kmeans.py:48
pyclustering.cluster.kmeans.kmeans_observer.set_evolution_centers
def set_evolution_centers(self, evolution_centers)
Set evolution of changes of centers during clustering process.
Definition: kmeans.py:68
pyclustering.cluster.kmeans.kmeans.__itermax
__itermax
Definition: kmeans.py:341
pyclustering.cluster.kmeans.kmeans.get_total_wce
def get_total_wce(self)
Returns sum of metric errors that depends on metric that was used for clustering (by default SSE - Su...
Definition: kmeans.py:477
pyclustering.cluster.kmeans.kmeans.__init__
def __init__(self, data, initial_centers, tolerance=0.001, ccore=True, **kwargs)
Constructor of clustering algorithm K-Means.
Definition: kmeans.py:314
pyclustering.cluster.kmeans.kmeans.__total_wce
__total_wce
Definition: kmeans.py:337
pyclustering.cluster.kmeans.kmeans_observer.set_evolution_clusters
def set_evolution_clusters(self, evolution_clusters)
Set evolution of changes of centers during clustering process.
Definition: kmeans.py:90
pyclustering.cluster.kmeans.kmeans_observer.__evolution_clusters
__evolution_clusters
Definition: kmeans.py:43
pyclustering.cluster.kmeans.kmeans.__calculate_dataset_difference
def __calculate_dataset_difference(self, amount_clusters)
Calculate distance from each point to each cluster center.
Definition: kmeans.py:559
pyclustering.cluster.encoder
Module for representing clustering results.
Definition: encoder.py:1
pyclustering.utils.metric
Module provides various distance metrics - abstraction of the notion of distance in a metric space.
Definition: metric.py:1
pyclustering.cluster.kmeans.kmeans.__update_centers
def __update_centers(self)
Calculate centers of clusters in line with contained objects.
Definition: kmeans.py:527