bang.py
1 """!
2 
3 @brief Cluster analysis algorithm: BANG.
4 @details Implementation based on paper @cite inproceedings::bang::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2018
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 import itertools
28 import warnings
29 
30 try:
31  import matplotlib
32  import matplotlib.gridspec as gridspec
33  import matplotlib.pyplot as plt
34  import matplotlib.patches as patches
35  import matplotlib.animation as animation
36 except Exception as error_instance:
37  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
38  "functionality is not available (details: '%s')." % str(error_instance))
39 
40 from pyclustering.cluster import cluster_visualizer
41 from pyclustering.cluster.encoder import type_encoding
42 
43 from pyclustering.utils import data_corners
44 from pyclustering.utils.color import color as color_list
45 
46 
47 
49  """!
50  @brief Visualizer of BANG algorithm's results.
51  @details BANG visualizer provides visualization services that are specific for BANG algorithm.
52 
53  """
54 
55 
56  __maximum_density_alpha = 0.6
57 
58 
59  @staticmethod
60  def show_blocks(directory):
61  """!
62  @brief Show BANG-blocks (leafs only) in data space.
63  @details BANG-blocks represents grid that was used for clustering process.
64 
65  @param[in] directory (bang_directory): Directory that was created by BANG algorithm during clustering process.
66 
67  """
68 
69  dimension = len(directory.get_data()[0])
70 
71  amount_canvases = 1
72  if dimension > 1:
73  amount_canvases = int(dimension * (dimension - 1) / 2)
74 
75  figure = plt.figure()
76  grid_spec = gridspec.GridSpec(1, amount_canvases)
77 
78  pairs = list(itertools.combinations(range(dimension), 2))
79  if len(pairs) == 0: pairs = [(0, 0)]
80 
81  for index in range(amount_canvases):
82  ax = figure.add_subplot(grid_spec[index])
83  bang_visualizer.__draw_blocks(ax, directory.get_leafs(), pairs[index])
84  bang_visualizer.__draw_two_dimension_data(ax, directory.get_data(), pairs[index])
85 
86  plt.show()
87 
88 
89  @staticmethod
90  def show_dendrogram(dendrogram):
91  """!
92  @brief Display dendrogram of BANG-blocks.
93 
94  @param[in] dendrogram (list): List representation of dendrogram of BANG-blocks.
95 
96  @see bang.get_dendrogram()
97 
98  """
99  plt.figure()
100  axis = plt.subplot(1, 1, 1)
101 
102  current_position = 0
103  for index_cluster in range(len(dendrogram)):
104  densities = [ block.get_density() for block in dendrogram[index_cluster] ]
105  xrange = range(current_position, current_position + len(densities))
106 
107  axis.bar(xrange, densities, 1.0, linewidth=0.0, color=color_list.get_color(index_cluster))
108 
109  current_position += len(densities)
110 
111  axis.set_ylabel("density")
112  axis.set_xlabel("block")
113  axis.xaxis.set_ticklabels([])
114 
115  plt.xlim([-0.5, current_position - 0.5])
116  plt.show()
117 
118 
119  @staticmethod
120  def show_clusters(data, clusters, noise=None):
121  """!
122  @brief Display K-Means clustering results.
123 
124  @param[in] data (list): Dataset that was used for clustering.
125  @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
126  @param[in] noise (array_like): Noise that were allocated by the algorithm.
127 
128  """
129  visualizer = cluster_visualizer()
130  visualizer.append_clusters(clusters, data)
131  visualizer.append_cluster(noise or [], data, marker='x')
132  visualizer.show()
133 
134 
135  @staticmethod
136  def __draw_two_dimension_data(ax, data, pair):
137  """!
138  @brief Display data in two-dimensional canvas.
139 
140  @param[in] ax (Axis): Canvas where data should be displayed.
141  @param[in] data (list): Data points that should be displayed.
142  @param[in] pair (tuple): Pair of dimension indexes.
143 
144  """
145  ax.set_xlabel("x%d" % pair[0])
146  ax.set_ylabel("x%d" % pair[1])
147 
148  for point in data:
149  if len(data[0]) > 1:
150  ax.plot(point[pair[0]], point[pair[1]], color='red', marker='.')
151  else:
152  ax.plot(point[pair[0]], 0, color='red', marker='.')
153  ax.yaxis.set_ticklabels([])
154 
155 
156  @staticmethod
157  def __draw_blocks(ax, blocks, pair):
158  """!
159  @brief Display BANG-blocks on specified figure.
160 
161  @param[in] ax (Axis): Axis where bang-blocks should be displayed.
162  @param[in] blocks (list): List of blocks that should be displyed.
163  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
164 
165  """
166  ax.grid(False)
167 
168  density_scale = blocks[-1].get_density()
169  for block in blocks:
170  bang_visualizer.__draw_block(ax, pair, block, density_scale)
171 
172 
173  @staticmethod
174  def __draw_block(ax, pair, block, density_scale):
175  """!
176  @brief Display BANG-block on the specified ax.
177 
178  @param[in] ax (Axis): Axis where block should be displayed.
179  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
180  @param[in] block (bang_block): BANG-block that should be displayed.
181  @param[in] density_scale (double): Max density to display density of the block by appropriate tone.
182 
183  """
184  max_corner, min_corner = bang_visualizer.__get_rectangle_description(block, pair)
185 
186  belong_cluster = block.get_cluster() is not None
187 
188  if density_scale != 0.0:
189  density_scale = bang_visualizer.__maximum_density_alpha * block.get_density() / density_scale
190 
191  face_color = matplotlib.colors.to_rgba('blue', alpha=density_scale)
192  edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
193 
194  rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
195  fill=belong_cluster,
196  facecolor=face_color,
197  edgecolor=edge_color,
198  linewidth=0.5)
199  ax.add_patch(rect)
200 
201 
202  @staticmethod
203  def __get_rectangle_description(block, pair):
204  """!
205  @brief Create rectangle description for block in specific dimension.
206 
207  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
208  @param[in] block (bang_block): BANG-block that should be displayed
209 
210  @return (tuple) Pair of corners that describes rectangle.
211 
212  """
213  max_corner, min_corner = block.get_spatial_block().get_corners()
214 
215  max_corner = [max_corner[pair[0]], max_corner[pair[1]]]
216  min_corner = [min_corner[pair[0]], min_corner[pair[1]]]
217 
218  if pair == (0, 0):
219  max_corner[1], min_corner[1] = 1.0, -1.0
220 
221  return max_corner, min_corner
222 
223 
225  """!
226  @brief Provides service for creating 2-D animation using BANG clustering results.
227  @details The animator does not support visualization of clustering process where non 2-dimensional was used.
228 
229  Code example of animation of BANG clustering process:
230  @code
231  from pyclustering.cluster.bang import bang, bang_animator
232  from pyclustering.utils import read_sample
233  from pyclustering.samples.definitions import FCPS_SAMPLES
234 
235  # Read data two dimensional data.
236  data = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
237 
238  # Create instance of BANG algorithm.
239  bang_instance = bang(data, 9)
240  bang_instance.process()
241 
242  # Obtain clustering results.
243  clusters = bang_instance.get_clusters()
244  noise = bang_instance.get_noise()
245  directory = bang_instance.get_directory()
246 
247  # Create BANG animation using class 'bang_animator':
248  animator = bang_animator(directory, clusters, noise)
249  animator.animate()
250  @endcode
251 
252 
253  """
254  def __init__(self, directory, clusters):
255  """!
256  @brief Creates BANG animator instance.
257 
258  @param[in] directory (bang_directory): BANG directory that was formed during BANG clustering process.
259  @param[in] clusters (list): Allocated clusters during BANG clustering process.
260 
261  """
262  self.__directory = directory
263  self.__clusters = clusters
264  self.__noise = []
265 
266  self.__current_block = 0
267  self.__current_level = 0
268  self.__level_blocks = directory.get_level(0)
269 
270  self.__figure = plt.figure()
271  self.__ax = self.__figure.add_subplot(1, 1, 1)
272  self.__special_frame = 0
273 
274  self.__validate_arguments()
275 
276 
277  def __validate_arguments(self):
278  """!
279  @brief Check correctness of input arguments and throw exception if incorrect is found.
280 
281  """
282  if len(self.__directory.get_data()[0]) != 2:
283  raise ValueError("Impossible to animate BANG clustering process for non 2D data.")
284 
285 
286  def __increment_block(self):
287  """!
288  @brief Increment BANG block safely by updating block index, level and level block.
289 
290  """
291  self.__current_block += 1
292  if self.__current_block >= len(self.__level_blocks):
293  self.__current_block = 0
294  self.__current_level += 1
295 
296  if self.__current_level < self.__directory.get_height():
297  self.__level_blocks = self.__directory.get_level(self.__current_level)
298 
299 
300  def __draw_block(self, block, block_alpha=0.0):
301  """!
302  @brief Display single BANG block on axis.
303 
304  @param[in] block (bang_block): BANG block that should be displayed.
305  @param[in] block_alpha (double): Transparency level - value of alpha.
306 
307  """
308  max_corner, min_corner = block.get_spatial_block().get_corners()
309 
310  face_color = matplotlib.colors.to_rgba('blue', alpha=block_alpha)
311  edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
312 
313  rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
314  fill=True,
315  facecolor=face_color,
316  edgecolor=edge_color,
317  linewidth=0.5)
318  self.__ax.add_patch(rect)
319 
320 
321  def __draw_leaf_density(self):
322  """!
323  @brief Display densities by filling blocks by appropriate colors.
324 
325  """
326  leafs = self.__directory.get_leafs()
327  density_scale = leafs[-1].get_density()
328 
329  if density_scale == 0.0: density_scale = 1.0
330 
331  for block in leafs:
332  alpha = 0.8 * block.get_density() / density_scale
333  self.__draw_block(block, alpha)
334 
335 
336  def __draw_clusters(self):
337  """!
338  @brief Display clusters and outliers using different colors.
339 
340  """
341  data = self.__directory.get_data()
342  for index_cluster in range(len(self.__clusters)):
343  color = color_list.get_color(index_cluster)
344  self.__draw_cluster(data, self.__clusters[index_cluster], color, '.')
345 
346  self.__draw_cluster(self.__directory.get_data(), self.__noise, 'gray', 'x')
347 
348 
349  def __draw_cluster(self, data, cluster, color, marker):
350  """!
351  @brief Draw 2-D single cluster on axis using specified color and marker.
352 
353  """
354  for item in cluster:
355  self.__ax.plot(data[item][0], data[item][1], color=color, marker=marker)
356 
357 
358  def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None):
359  """!
360  @brief Animates clustering process that is performed by BANG algorithm.
361 
362  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
363  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
364  @param[in] movie_filename (string): If it is specified then animation will be stored to file that is specified in this parameter.
365 
366  """
367  def init_frame():
368  self.__figure.clf()
369  self.__ax = self.__figure.add_subplot(1, 1, 1)
370  self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
371 
372  for point in self.__directory.get_data():
373  self.__ax.plot(point[0], point[1], color='red', marker='.')
374 
375  return frame_generation(0)
376 
377 
378  def frame_generation(index_iteration):
379  if self.__current_level < self.__directory.get_height():
380  block = self.__level_blocks[self.__current_block]
381  self.__draw_block(block)
382  self.__increment_block()
383 
384  else:
385  if self.__special_frame == 0:
386  self.__draw_leaf_density()
387 
388  elif self.__special_frame == 15:
389  self.__draw_clusters()
390 
391  elif self.__special_frame == 30:
392  self.__figure.clf()
393  self.__ax = self.__figure.add_subplot(1, 1, 1)
394  self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
395 
396  self.__draw_clusters()
397 
398  self.__special_frame += 1
399 
400 
401 
402  iterations = len(self.__directory) + 60
403  # print("Total number of iterations: %d" % iterations)
404  cluster_animation = animation.FuncAnimation(self.__figure, frame_generation, iterations,
405  interval=animation_velocity,
406  init_func=init_frame,
407  repeat_delay=5000)
408 
409  if movie_filename is not None:
410  cluster_animation.save(movie_filename, writer = 'ffmpeg', fps = movie_fps, bitrate = 3500)
411  else:
412  plt.show()
413 
414 
415 
417  """!
418  @brief BANG directory stores BANG-blocks that represents grid in data space.
419  @details The directory build BANG-blocks in binary tree manner. Leafs of the tree stored separately to provide
420  a direct access to the leafs that should be analysed. Leafs cache data-points.
421 
422  """
423  def __init__(self, data, levels, **kwargs):
424  """!
425  @brief Create BANG directory - basically tree structure with direct access to leafs.
426 
427  @param[in] data (list): Input data that is clustered.
428  @param[in] levels (uint): Height of the blocks tree.
429  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe').
430 
431  <b>Keyword Args:</b><br>
432  - observe (bool): If 'True' then blocks on each level are stored.
433  - density_threshold (double): The lowest level of density when contained data in bang-block is
434  considered as a noise and there is no need to split it till the last level. Be aware that this
435  parameter is used with 'amount_threshold' parameter.
436  - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is
437  considered as a noise and there is no need to split it till the last level.
438 
439  """
440  self.__data = data
441  self.__levels = levels
442  self.__density_threshold = kwargs.get('density_threshold', 0.0)
443  self.__amount_density = kwargs.get('amount_threshold', 0)
444  self.__leafs = []
445  self.__root = None
446  self.__level_blocks = []
447  self.__size = 0
448  self.__observe = kwargs.get('observe', True)
449 
450  self.__create_directory()
451 
452 
453  def __len__(self):
454  """!
455  @brief Returns amount of blocks that is stored in the directory
456 
457  @return (uint) Amount of blocks in the BANG directory.
458 
459  """
460  return self.__size
461 
462 
463  def get_data(self):
464  """!
465  @brief Return data that is stored in the directory.
466 
467  @return (list) List of points that represents stored data.
468 
469  """
470  return self.__data
471 
472 
473  def get_leafs(self):
474  """!
475  @brief Return leafs - the smallest blocks.
476  @details Some leafs can be bigger than others because splitting is not performed for blocks whose density is
477  less than threshold.
478 
479  @return (list) List of blocks that are leafs of BANG directory.
480 
481  """
482  return self.__leafs
483 
484 
485  def get_level(self, level):
486  """!
487  @brief Returns BANG blocks on the specific level.
488 
489  @param[in] level (uint): Level of tree where BANG blocks are located.
490 
491  @return (list) List of BANG blocks on the specific level.
492 
493  """
494  return self.__level_blocks[level]
495 
496 
497  def get_height(self):
498  """!
499  @brief Returns height of BANG tree where blocks are stored.
500 
501  @return (uint) Height of BANG tree.
502 
503  """
504  return len(self.__level_blocks)
505 
506 
507  def __create_directory(self):
508  """!
509  @brief Create BANG directory as a tree with separate storage for leafs.
510 
511  """
512 
513  min_corner, max_corner = data_corners(self.__data)
514  data_block = spatial_block(max_corner, min_corner)
515 
516  cache_require = (self.__levels == 1)
517  self.__root = bang_block(self.__data, 0, 0, data_block, cache_require)
518 
519  if cache_require:
520  self.__leafs.append(self.__root)
521  self.__store_level_blocks([self.__root])
522  else:
524 
525 
526  def __store_level_blocks(self, level_blocks):
527  """!
528  @brief Store level blocks if observing is enabled.
529 
530  @param[in] level_blocks (list): Created blocks on a new level.
531 
532  """
533  self.__size += len(level_blocks)
534  if self.__observe is True:
535  self.__level_blocks.append(level_blocks)
536 
537 
538 
539  def __build_directory_levels(self):
540  """!
541  @brief Build levels of direction if amount of level is greater than one.
542 
543  """
544 
545  previous_level_blocks = [ self.__root ]
546 
547  for level in range(1, self.__levels):
548  previous_level_blocks = self.__build_level(previous_level_blocks, level)
549  self.__store_level_blocks(previous_level_blocks)
550 
551  self.__leafs = sorted(self.__leafs, key=lambda block: block.get_density())
552 
553 
554  def __build_level(self, previous_level_blocks, level):
555  """!
556  @brief Build new level of directory.
557 
558  @param[in] previous_level_blocks (list): BANG-blocks on the previous level.
559  @param[in] level (uint): Level number that should be built.
560 
561  @return (list) New block on the specified level.
562 
563  """
564  current_level_blocks = []
565 
566  split_dimension = level % len(self.__data[0])
567  cache_require = (level == self.__levels - 1)
568 
569  for block in previous_level_blocks:
570  self.__split_block(block, split_dimension, cache_require, current_level_blocks)
571 
572  if cache_require:
573  self.__leafs += current_level_blocks
574 
575  return current_level_blocks
576 
577 
578  def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
579  """!
580  @brief Split specific block in specified dimension.
581  @details Split is not performed for block whose density is lower than threshold value, such blocks are putted to
582  leafs.
583 
584  @param[in] block (bang_block): BANG-block that should be split.
585  @param[in] split_dimension (uint): Dimension at which splitting should be performed.
586  @param[in] cache_require (bool): Defines when points in cache should be stored during density calculation.
587  @param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added.
588 
589  """
590  if block.get_density() <= self.__density_threshold or len(block) <= self.__amount_density:
591  self.__leafs.append(block)
592 
593  else:
594  left, right = block.split(split_dimension, cache_require)
595  current_level_blocks.append(left)
596  current_level_blocks.append(right)
597 
598 
599 
601  """!
602  @brief Geometrical description of BANG block in data space.
603  @details Provides services related to spatial function and used by bang_block
604 
605  @see bang_block
606 
607  """
608 
609  def __init__(self, max_corner, min_corner):
610  """!
611  @brief Creates spatial block in data space.
612 
613  @param[in] max_corner (array_like): Maximum corner coordinates of the block.
614  @param[in] min_corner (array_like): Minimal corner coordinates of the block.
615 
616  """
617  self.__max_corner = max_corner
618  self.__min_corner = min_corner
619  self.__volume = self.__calculate_volume()
620 
621 
622  def __str__(self):
623  """!
624  @brief Returns string block description.
625 
626  @return String representation of the block.
627 
628  """
629  return "(max: %s; min: %s)" % (self.__max_corner, self.__min_corner)
630 
631 
632  def __contains__(self, point):
633  """!
634  @brief Point is considered as contained if it lies in block (belong to it).
635 
636  @return (bool) True if point is in block, otherwise False.
637 
638  """
639  for i in range(len(point)):
640  if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
641  return False
642 
643  return True
644 
645 
646  def get_corners(self):
647  """!
648  @brief Return spatial description of current block.
649 
650  @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner).
651 
652  """
653  return self.__max_corner, self.__min_corner
654 
655 
656  def get_volume(self):
657  """!
658  @brief Returns volume of current block.
659  @details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle,
660  for 3D is volume of 3D figure, and for ND is volume of ND figure.
661 
662  @return (double) Volume of current block.
663 
664  """
665  return self.__volume
666 
667 
668  def split(self, dimension):
669  """!
670  @brief Split current block into two spatial blocks in specified dimension.
671 
672  @param[in] dimension (uint): Dimension where current block should be split.
673 
674  @return (tuple) Pair of new split blocks from current block.
675 
676  """
677  first_max_corner = self.__max_corner[:]
678  second_min_corner = self.__min_corner[:]
679 
680  split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
681 
682  first_max_corner[dimension] = split_border
683  second_min_corner[dimension] = split_border
684 
685  return spatial_block(first_max_corner, self.__min_corner), spatial_block(self.__max_corner, second_min_corner)
686 
687 
688  def is_neighbor(self, block):
689  """!
690  @brief Performs calculation to identify whether specified block is neighbor of current block.
691 
692  @param[in] block (spatial_block): Another block that is check whether it is neighbor.
693 
694  @return (bool) True is blocks are neighbors, False otherwise.
695 
696  """
697  if block is not self:
698  block_max_corner, _ = block.get_corners()
699  dimension = len(block_max_corner)
700  neighborhood_score = self.__calculate_neighborhood(block_max_corner)
701 
702  if neighborhood_score == dimension:
703  return True
704 
705  return False
706 
707 
708  def __calculate_neighborhood(self, block_max_corner):
709  """!
710  @brief Calculates neighborhood score that defined whether blocks are neighbors.
711 
712  @param[in] block_max_corner (list): Maximum coordinates of other block.
713 
714  @return (uint) Neighborhood score.
715 
716  """
717  dimension = len(block_max_corner)
718 
719  length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
720 
721  neighborhood_score = 0
722  for i in range(dimension):
723  diff = abs(block_max_corner[i] - self.__max_corner[i])
724 
725  if diff <= length_edges[i] + length_edges[i] * 0.0001:
726  neighborhood_score += 1
727 
728  return neighborhood_score
729 
730 
731  def __calculate_volume(self):
732  """!
733  @brief Calculates volume of current spatial block.
734  @details If empty dimension is detected (where all points has the same value) then such dimension is ignored
735  during calculation of volume.
736 
737  @return (double) Volume of current spatial block.
738 
739  """
740 
741  volume = 0.0
742  for i in range(0, len(self.__max_corner)):
743  side_length = self.__max_corner[i] - self.__min_corner[i]
744 
745  if side_length != 0.0:
746  if volume == 0.0: volume = side_length
747  else: volume *= side_length
748 
749  return volume
750 
751 
753  """!
754  @brief BANG-block that represent spatial region in data space.
755 
756  """
757  def __init__(self, data, region, level, space_block, cache_points=False):
758  """!
759  @brief Create BANG-block.
760 
761  @param[in] data (list): List of points that are processed.
762  @param[in] region (uint): Region number - unique value on a level.
763  @param[in] level (uint): Level number where block is created.
764  @param[in] space_block (spatial_block): Spatial block description in data space.
765  @param[in] cache_points (bool): if True then points are stored in memory (used for leaf blocks).
766 
767  """
768  self.__data = data
769  self.__region_number = region
770  self.__level = level
771  self.__spatial_block = space_block
772  self.__cache_points = cache_points
773 
774  self.__cluster = None
775  self.__points = None
778 
779 
780  def __str__(self):
781  """!
782  @brief Returns string representation of BANG-block using region number and level where block is located.
783 
784  """
785  return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
786 
787 
788  def __len__(self):
789  """!
790  @brief Returns block size defined by amount of points that are contained by this block.
791 
792  """
793  return self.__amount_points
794 
795 
796  def get_region(self):
797  """!
798  @brief Returns region number of BANG-block.
799  @details Region number is unique on among region numbers on a directory level. Pair of region number and level
800  is unique for all directory.
801 
802  @return (uint) Region number.
803 
804  """
805  return self.__region_number
806 
807 
808  def get_density(self):
809  """!
810  @brief Returns density of the BANG-block.
811 
812  @return (double) BANG-block density.
813 
814  """
815  return self.__density
816 
817 
818  def get_cluster(self):
819  """!
820  @brief Return index of cluster to which the BANG-block belongs to.
821  @details Index of cluster may have None value if the block was not assigned to any cluster.
822 
823  @return (uint) Index of cluster or None if the block does not belong to any cluster.
824 
825  """
826  return self.__cluster
827 
828 
829  def get_spatial_block(self):
830  """!
831  @brief Return spatial block - BANG-block description in data space.
832 
833  @return (spatial_block) Spatial block of the BANG-block.
834 
835  """
836  return self.__spatial_block
837 
838 
839  def get_points(self):
840  """!
841  @brief Return points that covers by the BANG-block.
842 
843  @return (list) List of point indexes that are covered by the block.
844 
845  """
846  if self.__points is None:
847  self.__cache_covered_data()
848 
849  return self.__points
850 
851 
852  def set_cluster(self, index):
853  """!
854  @brief Assign cluster to the BANG-block by index.
855 
856  @param[in] index (uint): Index cluster that is assigned to BANG-block.
857 
858  """
859  self.__cluster = index
860 
861 
862  def is_neighbor(self, block):
863  """!
864  @brief Performs calculation to check whether specified block is neighbor to the current.
865 
866  @param[in] block (bang_block): Other BANG-block that should be checked for neighborhood.
867 
868  @return (bool) True if blocks are neighbors, False if blocks are not neighbors.
869 
870  """
871  return self.get_spatial_block().is_neighbor(block.get_spatial_block())
872 
873 
874  def split(self, split_dimension, cache_points):
875  """!
876  @brief Split BANG-block into two new blocks in specified dimension.
877 
878  @param[in] split_dimension (uint): Dimension where block should be split.
879  @param[in] cache_points (bool): If True then covered points are cached. Used for leaf blocks.
880 
881  @return (tuple) Pair of BANG-block that were formed from the current.
882 
883  """
884  left_region_number = self.__region_number
885  right_region_number = self.__region_number + 2 ** self.__level
886 
887  first_spatial_block, second_spatial_block = self.__spatial_block.split(split_dimension)
888 
889  left = bang_block(self.__data, left_region_number, self.__level + 1, first_spatial_block, cache_points)
890  right = bang_block(self.__data, right_region_number, self.__level + 1, second_spatial_block, cache_points)
891 
892  return left, right
893 
894 
895  def __calculate_density(self, amount_points):
896  """!
897  @brief Calculates BANG-block density.
898 
899  @param[in] amount_points (uint): Amount of points in block.
900 
901  @return (double) BANG-block density.
902 
903  """
904  volume = self.__spatial_block.get_volume()
905  if volume != 0.0:
906  return amount_points / volume
907 
908  return 0.0
909 
910 
911  def __get_amount_points(self):
912  """!
913  @brief Count covered points by the BANG-block and if cache is enable then covered points are stored.
914 
915  @return (uint) Amount of covered points.
916 
917  """
918  amount = 0
919  for index in range(len(self.__data)):
920  if self.__data[index] in self.__spatial_block:
921  self.__cache_point(index)
922  amount += 1
923 
924  return amount
925 
926 
927  def __cache_covered_data(self):
928  """!
929  @brief Cache covered data.
930 
931  """
932  self.__cache_points = True
933  self.__points = []
934 
935  for index_point in range(len(self.__data)):
936  if self.__data[index_point] in self.__spatial_block:
937  self.__cache_point(index_point)
938 
939 
940  def __cache_point(self, index):
941  """!
942  @brief Store index points.
943 
944  @param[in] index (uint): Index point that should be stored.
945 
946  """
947  if self.__cache_points:
948  if self.__points is None:
949  self.__points = []
950 
951  self.__points.append(index)
952 
953 
954 
955 class bang:
956  """!
957  @brief Class implements BANG grid based clustering algorithm.
958  @details BANG clustering algorithms uses a multidimensional grid structure to organize the value space surrounding
959  the pattern values. The patterns are grouped into blocks and clustered with respect to the blocks by
960  a topological neighbor search algorithm @cite inproceedings::bang::1.
961 
962  Code example of BANG usage:
963  @code
964  from pyclustering.cluster.bang import bang, bang_visualizer
965  from pyclustering.utils import read_sample
966  from pyclustering.samples.definitions import FCPS_SAMPLES
967 
968  # Read data three dimensional data.
969  data = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
970 
971  # Prepare algorithm's parameters.
972  levels = 11
973 
974  # Create instance of BANG algorithm.
975  bang_instance = bang(data, levels)
976  bang_instance.process()
977 
978  # Obtain clustering results.
979  clusters = bang_instance.get_clusters()
980  noise = bang_instance.get_noise()
981  directory = bang_instance.get_directory()
982  dendrogram = bang_instance.get_dendrogram()
983 
984  # Visualize BANG clustering results.
985  bang_visualizer.show_blocks(directory)
986  bang_visualizer.show_dendrogram(dendrogram)
987  bang_visualizer.show_clusters(data, clusters, noise)
988  @endcode
989 
990  There is visualization of BANG-clustering of three-dimensional data 'chainlink'. BANG-blocks that were formed during
991  processing are shown on following figure. The darkest color means highest density, blocks that does not cover points
992  are transparent:
993  @image html bang_blocks_chainlink.png "Fig. 1. BANG-blocks that cover input data."
994 
995  Here is obtained dendrogram that can be used for further analysis to improve clustering results:
996  @image html bang_dendrogram_chainlink.png "Fig. 2. BANG dendrogram where the X-axis contains BANG-blocks, the Y-axis contains density."
997 
998  BANG clustering result of 'chainlink' data:
999  @image html bang_clustering_chainlink.png "Fig. 3. BANG clustering result. Data: 'chainlink'."
1000 
1001  """
1002 
1003  def __init__(self, data, levels, ccore=False, **kwargs):
1004  """!
1005  @brief Create BANG clustering algorithm.
1006 
1007  @param[in] data (list): Input data (list of points) that should be clustered.
1008  @param[in] levels (uint): Amount of levels in tree that is used for splitting (how many times block should be
1009  split). For example, if amount of levels is two then surface will be divided into two blocks and
1010  each obtained block will be divided into blocks also.
1011  @param[in] ccore (bool): Reserved positional argument - not used yet.
1012  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe').
1013 
1014  <b>Keyword Args:</b><br>
1015  - density_threshold (double): If block density is smaller than this value then contained data by this
1016  block is considered as a noise and its points as outliers. Block density is defined by amount of
1017  points in block divided by block volume: <i>amount_block_points</i>/<i>block_volume</i>. By default
1018  it is 0.0 - means than only empty blocks are considered as noise. Be aware that this parameter is used
1019  with parameter 'amount_threshold' - the maximum threshold is considered during processing.
1020  - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is
1021  considered as a noise and there is no need to split it till the last level. Be aware that this parameter
1022  is used with parameter 'density_threshold' - the maximum threshold is considered during processing.
1023 
1024  """
1025  self.__data = data
1026  self.__levels = levels
1027  self.__directory = None
1028  self.__clusters = []
1029  self.__noise = []
1030  self.__cluster_blocks = []
1031  self.__dendrogram = []
1032  self.__density_threshold = kwargs.get('density_threshold', 0.0)
1033  self.__amount_threshold = kwargs.get('amount_threshold', 0)
1034  self.__ccore = ccore
1035 
1036  self.__validate_arguments()
1037 
1038 
1039  def process(self):
1040  """!
1041  @brief Performs clustering process in line with rules of BANG clustering algorithm.
1042 
1043  @see get_clusters()
1044  @see get_noise()
1045  @see get_directory()
1046  @see get_dendrogram()
1047 
1048  """
1049  self.__directory = bang_directory(self.__data, self.__levels,
1050  density_threshold=self.__density_threshold,
1051  amount_threshold=self.__amount_threshold)
1052  self.__allocate_clusters()
1053 
1054 
1055  def get_clusters(self):
1056  """!
1057  @brief Returns allocated clusters.
1058 
1059  @remark Allocated clusters are returned only after data processing (method process()). Otherwise empty list is returned.
1060 
1061  @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
1062 
1063  @see process()
1064  @see get_noise()
1065 
1066  """
1067  return self.__clusters
1068 
1069 
1070  def get_noise(self):
1071  """!
1072  @brief Returns allocated noise.
1073 
1074  @remark Allocated noise is returned only after data processing (method process()). Otherwise empty list is returned.
1075 
1076  @return (list) List of indexes that are marked as a noise.
1077 
1078  @see process()
1079  @see get_clusters()
1080 
1081  """
1082  return self.__noise
1083 
1084 
1085  def get_directory(self):
1086  """!
1087  @brief Returns grid directory that describes grid of the processed data.
1088 
1089  @remark Grid directory is returned only after data processing (method process()). Otherwise None value is returned.
1090 
1091  @return (bang_directory) BANG directory that describes grid of process data.
1092 
1093  @see process()
1094 
1095  """
1096  return self.__directory
1097 
1098 
1099  def get_dendrogram(self):
1100  """!
1101  @brief Returns dendrogram of clusters.
1102  @details Dendrogram is created in following way: the density indices of all regions are calculated and sorted
1103  in decreasing order for each cluster during clustering process.
1104 
1105  @remark Dendrogram is returned only after data processing (method process()). Otherwise empty list is returned.
1106 
1107  """
1108  return self.__dendrogram
1109 
1110 
1112  """!
1113  @brief Returns clustering result representation type that indicate how clusters are encoded.
1114 
1115  @return (type_encoding) Clustering result representation.
1116 
1117  @see get_clusters()
1118 
1119  """
1120 
1121  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
1122 
1123 
1124  def __validate_arguments(self):
1125  """!
1126  @brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception
1127  is thrown.
1128 
1129  """
1130  if self.__levels <= 0:
1131  raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % self.__levels)
1132 
1133  if len(self.__data) == 0:
1134  raise ValueError("Empty input data. Data should contain at least one point.")
1135 
1136  if self.__density_threshold < 0:
1137  raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % self.__density_threshold)
1138 
1139 
1140  def __allocate_clusters(self):
1141  """!
1142  @brief Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
1143 
1144  """
1145  leaf_blocks = self.__directory.get_leafs()
1146  unhandled_block_indexes = set([i for i in range(len(leaf_blocks)) if leaf_blocks[i].get_density() > self.__density_threshold])
1147 
1148  current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
1149  cluster_index = 0
1150 
1151  while current_block is not None:
1152  if current_block.get_density() <= self.__density_threshold or len(current_block) <= self.__amount_threshold:
1153  break
1154 
1155  self.__expand_cluster_block(current_block, cluster_index, leaf_blocks, unhandled_block_indexes)
1156 
1157  current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
1158  cluster_index += 1
1159 
1160  self.__store_clustering_results(cluster_index, leaf_blocks)
1161 
1162 
1163  def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
1164  """!
1165  @brief Expand cluster from specific block that is considered as a central block.
1166 
1167  @param[in] block (bang_block): Block that is considered as a central block for cluster.
1168  @param[in] cluster_index (uint): Index of cluster that is assigned to blocks that forms new cluster.
1169  @param[in] leaf_blocks (list): Leaf BANG-blocks that are considered during cluster formation.
1170  @param[in] unhandled_block_indexes (set): Set of candidates (BANG block indexes) to become a cluster member. The
1171  parameter helps to reduce traversing among BANG-block providing only restricted set of block that
1172  should be considered.
1173 
1174  """
1175 
1176  block.set_cluster(cluster_index)
1177  self.__update_cluster_dendrogram(cluster_index, [block])
1178 
1179  neighbors = self.__find_block_neighbors(block, leaf_blocks, unhandled_block_indexes)
1180  self.__update_cluster_dendrogram(cluster_index, neighbors)
1181 
1182  for neighbor in neighbors:
1183  neighbor.set_cluster(cluster_index)
1184  neighbor_neighbors = self.__find_block_neighbors(neighbor, leaf_blocks, unhandled_block_indexes)
1185  self.__update_cluster_dendrogram(cluster_index, neighbor_neighbors)
1186 
1187  neighbors += neighbor_neighbors
1188 
1189 
1190  def __store_clustering_results(self, amount_clusters, leaf_blocks):
1191  """!
1192  @brief Stores clustering results in a convenient way.
1193 
1194  @param[in] amount_clusters (uint): Amount of cluster that was allocated during processing.
1195  @param[in] leaf_blocks (list): Leaf BANG-blocks (the smallest cells).
1196 
1197  """
1198  self.__clusters = [[] for _ in range(amount_clusters)]
1199  for block in leaf_blocks:
1200  index = block.get_cluster()
1201 
1202  if index is not None:
1203  self.__clusters[index] += block.get_points()
1204  else:
1205  self.__noise += block.get_points()
1206 
1207  self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
1208  self.__noise = list(set(self.__noise))
1209 
1210 
1211  def __find_block_center(self, level_blocks, unhandled_block_indexes):
1212  """!
1213  @brief Search block that is cluster center for new cluster.
1214 
1215  @return (bang_block) Central block for new cluster, if cluster is not found then None value is returned.
1216 
1217  """
1218  for i in reversed(range(len(level_blocks))):
1219  if level_blocks[i].get_density() <= self.__density_threshold:
1220  return None
1221 
1222  if level_blocks[i].get_cluster() is None:
1223  unhandled_block_indexes.remove(i)
1224  return level_blocks[i]
1225 
1226  return None
1227 
1228 
1229  def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
1230  """!
1231  @brief Search block neighbors that are parts of new clusters (density is greater than threshold and that are
1232  not cluster members yet), other neighbors are ignored.
1233 
1234  @param[in] block (bang_block): BANG-block for which neighbors should be found (which can be part of cluster).
1235  @param[in] level_blocks (list): BANG-blocks on specific level.
1236  @param[in] unhandled_block_indexes (set): Blocks that have not been processed yet.
1237 
1238  @return (list) Block neighbors that can become part of cluster.
1239 
1240  """
1241  neighbors = []
1242 
1243  handled_block_indexes = []
1244  for unhandled_index in unhandled_block_indexes:
1245  if block.is_neighbor(level_blocks[unhandled_index]):
1246  handled_block_indexes.append(unhandled_index)
1247  neighbors.append(level_blocks[unhandled_index])
1248 
1249  # Maximum number of neighbors is eight
1250  if len(neighbors) == 8:
1251  break
1252 
1253  for handled_index in handled_block_indexes:
1254  unhandled_block_indexes.remove(handled_index)
1255 
1256  return neighbors
1257 
1258 
1259  def __update_cluster_dendrogram(self, index_cluster, blocks):
1260  """!
1261  @brief Append clustered blocks to dendrogram.
1262 
1263  @param[in] index_cluster (uint): Cluster index that was assigned to blocks.
1264  @param[in] blocks (list): Blocks that were clustered.
1265 
1266  """
1267  if len(self.__dendrogram) <= index_cluster:
1268  self.__dendrogram.append([])
1269 
1270  blocks = sorted(blocks, key=lambda block: block.get_density(), reverse=True)
1271  self.__dendrogram[index_cluster] += blocks
1272 
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:359
pyclustering module for cluster analysis.
Definition: __init__.py:1
def __update_cluster_dendrogram(self, index_cluster, blocks)
Append clustered blocks to dendrogram.
Definition: bang.py:1259
def get_clusters(self)
Returns allocated clusters.
Definition: bang.py:1055
def get_noise(self)
Returns allocated noise.
Definition: bang.py:1070
def set_cluster(self, index)
Assign cluster to the BANG-block by index.
Definition: bang.py:852
def __draw_clusters(self)
Display clusters and outliers using different colors.
Definition: bang.py:336
def __draw_block(self, block, block_alpha=0.0)
Display single BANG block on axis.
Definition: bang.py:300
def get_points(self)
Return points that covers by the BANG-block.
Definition: bang.py:839
def __calculate_volume(self)
Calculates volume of current spatial block.
Definition: bang.py:731
BANG-block that represent spatial region in data space.
Definition: bang.py:752
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
Module for representing clustering results.
Definition: encoder.py:1
def is_neighbor(self, block)
Performs calculation to check whether specified block is neighbor to the current. ...
Definition: bang.py:862
def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes)
Expand cluster from specific block that is considered as a central block.
Definition: bang.py:1163
def get_height(self)
Returns height of BANG tree where blocks are stored.
Definition: bang.py:497
Colors used by pyclustering library for visualization.
Definition: color.py:1
def is_neighbor(self, block)
Performs calculation to identify whether specified block is neighbor of current block.
Definition: bang.py:688
Visualizer of BANG algorithm&#39;s results.
Definition: bang.py:48
def get_region(self)
Returns region number of BANG-block.
Definition: bang.py:796
def __init__(self, data, levels, kwargs)
Create BANG directory - basically tree structure with direct access to leafs.
Definition: bang.py:423
def __contains__(self, point)
Point is considered as contained if it lies in block (belong to it).
Definition: bang.py:632
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: bang.py:1111
def __get_amount_points(self)
Count covered points by the BANG-block and if cache is enable then covered points are stored...
Definition: bang.py:911
def __init__(self, data, region, level, space_block, cache_points=False)
Create BANG-block.
Definition: bang.py:757
def get_leafs(self)
Return leafs - the smallest blocks.
Definition: bang.py:473
Class implements BANG grid based clustering algorithm.
Definition: bang.py:955
def __create_directory(self)
Create BANG directory as a tree with separate storage for leafs.
Definition: bang.py:507
def __str__(self)
Returns string representation of BANG-block using region number and level where block is located...
Definition: bang.py:780
def __draw_cluster(self, data, cluster, color, marker)
Draw 2-D single cluster on axis using specified color and marker.
Definition: bang.py:349
def __find_block_center(self, level_blocks, unhandled_block_indexes)
Search block that is cluster center for new cluster.
Definition: bang.py:1211
def __allocate_clusters(self)
Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
Definition: bang.py:1140
def get_dendrogram(self)
Returns dendrogram of clusters.
Definition: bang.py:1099
Geometrical description of BANG block in data space.
Definition: bang.py:600
def __len__(self)
Returns block size defined by amount of points that are contained by this block.
Definition: bang.py:788
Provides service for creating 2-D animation using BANG clustering results.
Definition: bang.py:224
def get_corners(self)
Return spatial description of current block.
Definition: bang.py:646
def __split_block(self, block, split_dimension, cache_require, current_level_blocks)
Split specific block in specified dimension.
Definition: bang.py:578
def __increment_block(self)
Increment BANG block safely by updating block index, level and level block.
Definition: bang.py:286
def get_data(self)
Return data that is stored in the directory.
Definition: bang.py:463
def __str__(self)
Returns string block description.
Definition: bang.py:622
def get_directory(self)
Returns grid directory that describes grid of the processed data.
Definition: bang.py:1085
def show_blocks(directory)
Show BANG-blocks (leafs only) in data space.
Definition: bang.py:60
def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes)
Search block neighbors that are parts of new clusters (density is greater than threshold and that are...
Definition: bang.py:1229
def __calculate_density(self, amount_points)
Calculates BANG-block density.
Definition: bang.py:895
def __calculate_neighborhood(self, block_max_corner)
Calculates neighborhood score that defined whether blocks are neighbors.
Definition: bang.py:708
def __init__(self, data, levels, ccore=False, kwargs)
Create BANG clustering algorithm.
Definition: bang.py:1003
def __build_level(self, previous_level_blocks, level)
Build new level of directory.
Definition: bang.py:554
def __store_level_blocks(self, level_blocks)
Store level blocks if observing is enabled.
Definition: bang.py:526
def __init__(self, directory, clusters)
Creates BANG animator instance.
Definition: bang.py:254
def __cache_covered_data(self)
Cache covered data.
Definition: bang.py:927
def __validate_arguments(self)
Check correctness of input arguments and throw exception if incorrect is found.
Definition: bang.py:277
def get_volume(self)
Returns volume of current block.
Definition: bang.py:656
def split(self, dimension)
Split current block into two spatial blocks in specified dimension.
Definition: bang.py:668
def get_density(self)
Returns density of the BANG-block.
Definition: bang.py:808
def __build_directory_levels(self)
Build levels of direction if amount of level is greater than one.
Definition: bang.py:539
def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None)
Animates clustering process that is performed by BANG algorithm.
Definition: bang.py:358
def show_clusters(data, clusters, noise=None)
Display K-Means clustering results.
Definition: bang.py:120
def __init__(self, max_corner, min_corner)
Creates spatial block in data space.
Definition: bang.py:609
def __len__(self)
Returns amount of blocks that is stored in the directory.
Definition: bang.py:453
def __draw_leaf_density(self)
Display densities by filling blocks by appropriate colors.
Definition: bang.py:321
def split(self, split_dimension, cache_points)
Split BANG-block into two new blocks in specified dimension.
Definition: bang.py:874
def show_dendrogram(dendrogram)
Display dendrogram of BANG-blocks.
Definition: bang.py:90
def process(self)
Performs clustering process in line with rules of BANG clustering algorithm.
Definition: bang.py:1039
def __validate_arguments(self)
Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception ...
Definition: bang.py:1124
BANG directory stores BANG-blocks that represents grid in data space.
Definition: bang.py:416
def get_level(self, level)
Returns BANG blocks on the specific level.
Definition: bang.py:485
def __cache_point(self, index)
Store index points.
Definition: bang.py:940
def __store_clustering_results(self, amount_clusters, leaf_blocks)
Stores clustering results in a convenient way.
Definition: bang.py:1190
def get_spatial_block(self)
Return spatial block - BANG-block description in data space.
Definition: bang.py:829
def get_cluster(self)
Return index of cluster to which the BANG-block belongs to.
Definition: bang.py:818