bang.py
1 """!
2 
3 @brief Cluster analysis algorithm: BANG.
4 @details Implementation based on paper @cite inproceedings::bang::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 import itertools
28 import warnings
29 
30 try:
31  import matplotlib
32  import matplotlib.gridspec as gridspec
33  import matplotlib.pyplot as plt
34  import matplotlib.patches as patches
35  import matplotlib.animation as animation
36 except Exception as error_instance:
37  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
38  "functionality is not available (details: '%s')." % str(error_instance))
39 
40 from pyclustering.cluster import cluster_visualizer
41 from pyclustering.cluster.encoder import type_encoding
42 
43 from pyclustering.utils import data_corners
44 from pyclustering.utils.color import color as color_list
45 
46 
47 
49  """!
50  @brief Visualizer of BANG algorithm's results.
51  @details BANG visualizer provides visualization services that are specific for BANG algorithm.
52 
53  """
54 
55  __maximum_density_alpha = 0.6
56 
57 
58  @staticmethod
59  def show_blocks(directory):
60  """!
61  @brief Show BANG-blocks (leafs only) in data space.
62  @details BANG-blocks represents grid that was used for clustering process.
63 
64  @param[in] directory (bang_directory): Directory that was created by BANG algorithm during clustering process.
65 
66  """
67 
68  dimension = len(directory.get_data()[0])
69 
70  amount_canvases = 1
71  if dimension > 1:
72  amount_canvases = int(dimension * (dimension - 1) / 2)
73 
74  figure = plt.figure()
75  grid_spec = gridspec.GridSpec(1, amount_canvases)
76 
77  pairs = list(itertools.combinations(range(dimension), 2))
78  if len(pairs) == 0: pairs = [(0, 0)]
79 
80  for index in range(amount_canvases):
81  ax = figure.add_subplot(grid_spec[index])
82  bang_visualizer.__draw_blocks(ax, directory.get_leafs(), pairs[index])
83  bang_visualizer.__draw_two_dimension_data(ax, directory.get_data(), pairs[index])
84 
85  plt.show()
86 
87 
88  @staticmethod
89  def show_dendrogram(dendrogram):
90  """!
91  @brief Display dendrogram of BANG-blocks.
92 
93  @param[in] dendrogram (list): List representation of dendrogram of BANG-blocks.
94 
95  @see bang.get_dendrogram()
96 
97  """
98  plt.figure()
99  axis = plt.subplot(1, 1, 1)
100 
101  current_position = 0
102  for index_cluster in range(len(dendrogram)):
103  densities = [ block.get_density() for block in dendrogram[index_cluster] ]
104  xrange = range(current_position, current_position + len(densities))
105 
106  axis.bar(xrange, densities, 1.0, linewidth=0.0, color=color_list.get_color(index_cluster))
107 
108  current_position += len(densities)
109 
110  axis.set_ylabel("density")
111  axis.set_xlabel("block")
112  axis.xaxis.set_ticklabels([])
113 
114  plt.xlim([-0.5, current_position - 0.5])
115  plt.show()
116 
117 
118  @staticmethod
119  def show_clusters(data, clusters, noise=None):
120  """!
121  @brief Display BANG clustering results.
122 
123  @param[in] data (list): Dataset that was used for clustering.
124  @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
125  @param[in] noise (array_like): Noise that were allocated by the algorithm.
126 
127  """
128  visualizer = cluster_visualizer()
129  visualizer.append_clusters(clusters, data)
130  visualizer.append_cluster(noise or [], data, marker='x')
131  visualizer.show()
132 
133 
134  @staticmethod
135  def __draw_two_dimension_data(ax, data, pair):
136  """!
137  @brief Display data in two-dimensional canvas.
138 
139  @param[in] ax (Axis): Canvas where data should be displayed.
140  @param[in] data (list): Data points that should be displayed.
141  @param[in] pair (tuple): Pair of dimension indexes.
142 
143  """
144  ax.set_xlabel("x%d" % pair[0])
145  ax.set_ylabel("x%d" % pair[1])
146 
147  for point in data:
148  if len(data[0]) > 1:
149  ax.plot(point[pair[0]], point[pair[1]], color='red', marker='.')
150  else:
151  ax.plot(point[pair[0]], 0, color='red', marker='.')
152  ax.yaxis.set_ticklabels([])
153 
154 
155  @staticmethod
156  def __draw_blocks(ax, blocks, pair):
157  """!
158  @brief Display BANG-blocks on specified figure.
159 
160  @param[in] ax (Axis): Axis where bang-blocks should be displayed.
161  @param[in] blocks (list): List of blocks that should be displyed.
162  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
163 
164  """
165  ax.grid(False)
166 
167  density_scale = blocks[-1].get_density()
168  for block in blocks:
169  bang_visualizer.__draw_block(ax, pair, block, density_scale)
170 
171 
172  @staticmethod
173  def __draw_block(ax, pair, block, density_scale):
174  """!
175  @brief Display BANG-block on the specified ax.
176 
177  @param[in] ax (Axis): Axis where block should be displayed.
178  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
179  @param[in] block (bang_block): BANG-block that should be displayed.
180  @param[in] density_scale (double): Max density to display density of the block by appropriate tone.
181 
182  """
183  max_corner, min_corner = bang_visualizer.__get_rectangle_description(block, pair)
184 
185  belong_cluster = block.get_cluster() is not None
186 
187  if density_scale != 0.0:
188  density_scale = bang_visualizer.__maximum_density_alpha * block.get_density() / density_scale
189 
190  face_color = matplotlib.colors.to_rgba('blue', alpha=density_scale)
191  edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
192 
193  rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
194  fill=belong_cluster,
195  facecolor=face_color,
196  edgecolor=edge_color,
197  linewidth=0.5)
198  ax.add_patch(rect)
199 
200 
201  @staticmethod
202  def __get_rectangle_description(block, pair):
203  """!
204  @brief Create rectangle description for block in specific dimension.
205 
206  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
207  @param[in] block (bang_block): BANG-block that should be displayed
208 
209  @return (tuple) Pair of corners that describes rectangle.
210 
211  """
212  max_corner, min_corner = block.get_spatial_block().get_corners()
213 
214  max_corner = [max_corner[pair[0]], max_corner[pair[1]]]
215  min_corner = [min_corner[pair[0]], min_corner[pair[1]]]
216 
217  if pair == (0, 0):
218  max_corner[1], min_corner[1] = 1.0, -1.0
219 
220  return max_corner, min_corner
221 
222 
224  """!
225  @brief Provides service for creating 2-D animation using BANG clustering results.
226  @details The animator does not support visualization of clustering process where non 2-dimensional was used.
227 
228  Code example of animation of BANG clustering process:
229  @code
230  from pyclustering.cluster.bang import bang, bang_animator
231  from pyclustering.utils import read_sample
232  from pyclustering.samples.definitions import FCPS_SAMPLES
233 
234  # Read data two dimensional data.
235  data = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
236 
237  # Create instance of BANG algorithm.
238  bang_instance = bang(data, 9)
239  bang_instance.process()
240 
241  # Obtain clustering results.
242  clusters = bang_instance.get_clusters()
243  noise = bang_instance.get_noise()
244  directory = bang_instance.get_directory()
245 
246  # Create BANG animation using class 'bang_animator':
247  animator = bang_animator(directory, clusters)
248  animator.animate()
249  @endcode
250 
251 
252  """
253  def __init__(self, directory, clusters):
254  """!
255  @brief Creates BANG animator instance.
256 
257  @param[in] directory (bang_directory): BANG directory that was formed during BANG clustering process.
258  @param[in] clusters (list): Allocated clusters during BANG clustering process.
259 
260  """
261  self.__directory = directory
262  self.__clusters = clusters
263  self.__noise = []
264 
265  self.__current_block = 0
266  self.__current_level = 0
267  self.__level_blocks = directory.get_level(0)
268 
269  self.__figure = plt.figure()
270  self.__ax = self.__figure.add_subplot(1, 1, 1)
271  self.__special_frame = 0
272 
273  self.__validate_arguments()
274 
275 
276  def __validate_arguments(self):
277  """!
278  @brief Check correctness of input arguments and throw exception if incorrect is found.
279 
280  """
281  if len(self.__directory.get_data()[0]) != 2:
282  raise ValueError("Impossible to animate BANG clustering process for non 2D data.")
283 
284 
285  def __increment_block(self):
286  """!
287  @brief Increment BANG block safely by updating block index, level and level block.
288 
289  """
290  self.__current_block += 1
291  if self.__current_block >= len(self.__level_blocks):
292  self.__current_block = 0
293  self.__current_level += 1
294 
295  if self.__current_level < self.__directory.get_height():
296  self.__level_blocks = self.__directory.get_level(self.__current_level)
297 
298 
299  def __draw_block(self, block, block_alpha=0.0):
300  """!
301  @brief Display single BANG block on axis.
302 
303  @param[in] block (bang_block): BANG block that should be displayed.
304  @param[in] block_alpha (double): Transparency level - value of alpha.
305 
306  """
307  max_corner, min_corner = block.get_spatial_block().get_corners()
308 
309  face_color = matplotlib.colors.to_rgba('blue', alpha=block_alpha)
310  edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
311 
312  rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
313  fill=True,
314  facecolor=face_color,
315  edgecolor=edge_color,
316  linewidth=0.5)
317  self.__ax.add_patch(rect)
318 
319 
320  def __draw_leaf_density(self):
321  """!
322  @brief Display densities by filling blocks by appropriate colors.
323 
324  """
325  leafs = self.__directory.get_leafs()
326  density_scale = leafs[-1].get_density()
327 
328  if density_scale == 0.0: density_scale = 1.0
329 
330  for block in leafs:
331  alpha = 0.8 * block.get_density() / density_scale
332  self.__draw_block(block, alpha)
333 
334 
335  def __draw_clusters(self):
336  """!
337  @brief Display clusters and outliers using different colors.
338 
339  """
340  data = self.__directory.get_data()
341  for index_cluster in range(len(self.__clusters)):
342  color = color_list.get_color(index_cluster)
343  self.__draw_cluster(data, self.__clusters[index_cluster], color, '.')
344 
345  self.__draw_cluster(self.__directory.get_data(), self.__noise, 'gray', 'x')
346 
347 
348  def __draw_cluster(self, data, cluster, color, marker):
349  """!
350  @brief Draw 2-D single cluster on axis using specified color and marker.
351 
352  """
353  for item in cluster:
354  self.__ax.plot(data[item][0], data[item][1], color=color, marker=marker)
355 
356 
357  def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None):
358  """!
359  @brief Animates clustering process that is performed by BANG algorithm.
360 
361  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
362  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
363  @param[in] movie_filename (string): If it is specified then animation will be stored to file that is specified in this parameter.
364 
365  """
366  def init_frame():
367  self.__figure.clf()
368  self.__ax = self.__figure.add_subplot(1, 1, 1)
369  self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
370 
371  for point in self.__directory.get_data():
372  self.__ax.plot(point[0], point[1], color='red', marker='.')
373 
374  return frame_generation(0)
375 
376 
377  def frame_generation(index_iteration):
378  if self.__current_level < self.__directory.get_height():
379  block = self.__level_blocks[self.__current_block]
380  self.__draw_block(block)
381  self.__increment_block()
382 
383  else:
384  if self.__special_frame == 0:
385  self.__draw_leaf_density()
386 
387  elif self.__special_frame == 15:
388  self.__draw_clusters()
389 
390  elif self.__special_frame == 30:
391  self.__figure.clf()
392  self.__ax = self.__figure.add_subplot(1, 1, 1)
393  self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
394 
395  self.__draw_clusters()
396 
397  self.__special_frame += 1
398 
399 
400 
401  iterations = len(self.__directory) + 60
402  # print("Total number of iterations: %d" % iterations)
403  cluster_animation = animation.FuncAnimation(self.__figure, frame_generation, iterations,
404  interval=animation_velocity,
405  init_func=init_frame,
406  repeat_delay=5000)
407 
408  if movie_filename is not None:
409  cluster_animation.save(movie_filename, writer = 'ffmpeg', fps = movie_fps, bitrate = 3500)
410  else:
411  plt.show()
412 
413 
414 
416  """!
417  @brief BANG directory stores BANG-blocks that represents grid in data space.
418  @details The directory build BANG-blocks in binary tree manner. Leafs of the tree stored separately to provide
419  a direct access to the leafs that should be analysed. Leafs cache data-points.
420 
421  """
422  def __init__(self, data, levels, **kwargs):
423  """!
424  @brief Create BANG directory - basically tree structure with direct access to leafs.
425 
426  @param[in] data (list): Input data that is clustered.
427  @param[in] levels (uint): Height of the tree of blocks.
428  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe').
429 
430  <b>Keyword Args:</b><br>
431  - observe (bool): If 'True' then blocks on each level are stored.
432  - density_threshold (double): The lowest level of density when contained data in bang-block is
433  considered as a noise and there is no need to split it till the last level. Be aware that this
434  parameter is used with 'amount_threshold' parameter.
435  - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is
436  considered as a noise and there is no need to split it till the last level.
437 
438  """
439  self.__data = data
440  self.__levels = levels
441  self.__density_threshold = kwargs.get('density_threshold', 0.0)
442  self.__amount_density = kwargs.get('amount_threshold', 0)
443  self.__leafs = []
444  self.__root = None
445  self.__level_blocks = []
446  self.__size = 0
447  self.__observe = kwargs.get('observe', True)
448 
449  self.__create_directory()
450 
451 
452  def __len__(self):
453  """!
454  @brief Returns amount of blocks that is stored in the directory
455 
456  @return (uint) Amount of blocks in the BANG directory.
457 
458  """
459  return self.__size
460 
461 
462  def get_data(self):
463  """!
464  @brief Return data that is stored in the directory.
465 
466  @return (list) List of points that represents stored data.
467 
468  """
469  return self.__data
470 
471 
472  def get_leafs(self):
473  """!
474  @brief Return leafs - the smallest blocks.
475  @details Some leafs can be bigger than others because splitting is not performed for blocks whose density is
476  less than threshold.
477 
478  @return (list) List of blocks that are leafs of BANG directory.
479 
480  """
481  return self.__leafs
482 
483 
484  def get_level(self, level):
485  """!
486  @brief Returns BANG blocks on the specific level.
487 
488  @param[in] level (uint): Level of tree where BANG blocks are located.
489 
490  @return (list) List of BANG blocks on the specific level.
491 
492  """
493  return self.__level_blocks[level]
494 
495 
496  def get_height(self):
497  """!
498  @brief Returns height of BANG tree where blocks are stored.
499 
500  @return (uint) Height of BANG tree.
501 
502  """
503  return len(self.__level_blocks)
504 
505 
506  def __create_directory(self):
507  """!
508  @brief Create BANG directory as a tree with separate storage for leafs.
509 
510  """
511 
512  min_corner, max_corner = data_corners(self.__data)
513  data_block = spatial_block(max_corner, min_corner)
514 
515  cache_require = (self.__levels == 1)
516  self.__root = bang_block(self.__data, 0, 0, data_block, cache_require)
517 
518  if cache_require:
519  self.__leafs.append(self.__root)
520  self.__store_level_blocks([self.__root])
521  else:
523 
524 
525  def __store_level_blocks(self, level_blocks):
526  """!
527  @brief Store level blocks if observing is enabled.
528 
529  @param[in] level_blocks (list): Created blocks on a new level.
530 
531  """
532  self.__size += len(level_blocks)
533  if self.__observe is True:
534  self.__level_blocks.append(level_blocks)
535 
536 
537 
538  def __build_directory_levels(self):
539  """!
540  @brief Build levels of direction if amount of level is greater than one.
541 
542  """
543 
544  previous_level_blocks = [ self.__root ]
545 
546  for level in range(1, self.__levels):
547  previous_level_blocks = self.__build_level(previous_level_blocks, level)
548  self.__store_level_blocks(previous_level_blocks)
549 
550  self.__leafs = sorted(self.__leafs, key=lambda block: block.get_density())
551 
552 
553  def __build_level(self, previous_level_blocks, level):
554  """!
555  @brief Build new level of directory.
556 
557  @param[in] previous_level_blocks (list): BANG-blocks on the previous level.
558  @param[in] level (uint): Level number that should be built.
559 
560  @return (list) New block on the specified level.
561 
562  """
563  current_level_blocks = []
564 
565  split_dimension = level % len(self.__data[0])
566  cache_require = (level == self.__levels - 1)
567 
568  for block in previous_level_blocks:
569  self.__split_block(block, split_dimension, cache_require, current_level_blocks)
570 
571  if cache_require:
572  self.__leafs += current_level_blocks
573 
574  return current_level_blocks
575 
576 
577  def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
578  """!
579  @brief Split specific block in specified dimension.
580  @details Split is not performed for block whose density is lower than threshold value, such blocks are putted to
581  leafs.
582 
583  @param[in] block (bang_block): BANG-block that should be split.
584  @param[in] split_dimension (uint): Dimension at which splitting should be performed.
585  @param[in] cache_require (bool): Defines when points in cache should be stored during density calculation.
586  @param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added.
587 
588  """
589  if block.get_density() <= self.__density_threshold or len(block) <= self.__amount_density:
590  self.__leafs.append(block)
591 
592  else:
593  left, right = block.split(split_dimension, cache_require)
594  current_level_blocks.append(left)
595  current_level_blocks.append(right)
596 
597 
599  """!
600  @brief Geometrical description of BANG block in data space.
601  @details Provides services related to spatial functionality and used by bang_block
602 
603  @see bang_block
604 
605  """
606 
607  def __init__(self, max_corner, min_corner):
608  """!
609  @brief Creates spatial block in data space.
610 
611  @param[in] max_corner (array_like): Maximum corner coordinates of the block.
612  @param[in] min_corner (array_like): Minimal corner coordinates of the block.
613 
614  """
615  self.__max_corner = max_corner
616  self.__min_corner = min_corner
617  self.__volume = self.__calculate_volume()
618 
619 
620  def __str__(self):
621  """!
622  @brief Returns string block description.
623 
624  @return String representation of the block.
625 
626  """
627  return "(max: %s; min: %s)" % (self.__max_corner, self.__min_corner)
628 
629 
630  def __contains__(self, point):
631  """!
632  @brief Point is considered as contained if it lies in block (belong to it).
633 
634  @return (bool) True if point is in block, otherwise False.
635 
636  """
637  for i in range(len(point)):
638  if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
639  return False
640 
641  return True
642 
643 
644  def get_corners(self):
645  """!
646  @brief Return spatial description of current block.
647 
648  @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner).
649 
650  """
651  return self.__max_corner, self.__min_corner
652 
653 
654  def get_volume(self):
655  """!
656  @brief Returns volume of current block.
657  @details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle,
658  for 3D is volume of 3D figure, and for ND is volume of ND figure.
659 
660  @return (double) Volume of current block.
661 
662  """
663  return self.__volume
664 
665 
666  def split(self, dimension):
667  """!
668  @brief Split current block into two spatial blocks in specified dimension.
669 
670  @param[in] dimension (uint): Dimension where current block should be split.
671 
672  @return (tuple) Pair of new split blocks from current block.
673 
674  """
675  first_max_corner = self.__max_corner[:]
676  second_min_corner = self.__min_corner[:]
677 
678  split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
679 
680  first_max_corner[dimension] = split_border
681  second_min_corner[dimension] = split_border
682 
683  return spatial_block(first_max_corner, self.__min_corner), spatial_block(self.__max_corner, second_min_corner)
684 
685 
686  def is_neighbor(self, block):
687  """!
688  @brief Performs calculation to identify whether specified block is neighbor of current block.
689  @details It also considers diagonal blocks as neighbors.
690 
691  @param[in] block (spatial_block): Another block that is check whether it is neighbor.
692 
693  @return (bool) True is blocks are neighbors, False otherwise.
694 
695  """
696  if block is not self:
697  block_max_corner, _ = block.get_corners()
698  dimension = len(block_max_corner)
699  neighborhood_score = self.__calculate_neighborhood(block_max_corner)
700 
701  if neighborhood_score == dimension:
702  return True
703 
704  return False
705 
706 
707  def __calculate_neighborhood(self, block_max_corner):
708  """!
709  @brief Calculates neighborhood score that defined whether blocks are neighbors.
710 
711  @param[in] block_max_corner (list): Maximum coordinates of other block.
712 
713  @return (uint) Neighborhood score.
714 
715  """
716  dimension = len(block_max_corner)
717 
718  length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
719 
720  neighborhood_score = 0
721  for i in range(dimension):
722  diff = abs(block_max_corner[i] - self.__max_corner[i])
723 
724  if diff <= length_edges[i] + length_edges[i] * 0.0001:
725  neighborhood_score += 1
726 
727  return neighborhood_score
728 
729 
730  def __calculate_volume(self):
731  """!
732  @brief Calculates volume of current spatial block.
733  @details If empty dimension is detected (where all points has the same value) then such dimension is ignored
734  during calculation of volume.
735 
736  @return (double) Volume of current spatial block.
737 
738  """
739 
740  volume = 0.0
741  for i in range(0, len(self.__max_corner)):
742  side_length = self.__max_corner[i] - self.__min_corner[i]
743 
744  if side_length != 0.0:
745  if volume == 0.0: volume = side_length
746  else: volume *= side_length
747 
748  return volume
749 
750 
752  """!
753  @brief BANG-block that represent spatial region in data space.
754 
755  """
756  def __init__(self, data, region, level, space_block, cache_points=False):
757  """!
758  @brief Create BANG-block.
759 
760  @param[in] data (list): List of points that are processed.
761  @param[in] region (uint): Region number - unique value on a level.
762  @param[in] level (uint): Level number where block is created.
763  @param[in] space_block (spatial_block): Spatial block description in data space.
764  @param[in] cache_points (bool): if True then points are stored in memory (used for leaf blocks).
765 
766  """
767  self.__data = data
768  self.__region_number = region
769  self.__level = level
770  self.__spatial_block = space_block
771  self.__cache_points = cache_points
772 
773  self.__cluster = None
774  self.__points = None
777 
778 
779  def __str__(self):
780  """!
781  @brief Returns string representation of BANG-block using region number and level where block is located.
782 
783  """
784  return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
785 
786 
787  def __len__(self):
788  """!
789  @brief Returns block size defined by amount of points that are contained by this block.
790 
791  """
792  return self.__amount_points
793 
794 
795  def get_region(self):
796  """!
797  @brief Returns region number of BANG-block.
798  @details Region number is unique on among region numbers on a directory level. Pair of region number and level
799  is unique for all directory.
800 
801  @return (uint) Region number.
802 
803  """
804  return self.__region_number
805 
806 
807  def get_density(self):
808  """!
809  @brief Returns density of the BANG-block.
810 
811  @return (double) BANG-block density.
812 
813  """
814  return self.__density
815 
816 
817  def get_cluster(self):
818  """!
819  @brief Return index of cluster to which the BANG-block belongs to.
820  @details Index of cluster may have None value if the block was not assigned to any cluster.
821 
822  @return (uint) Index of cluster or None if the block does not belong to any cluster.
823 
824  """
825  return self.__cluster
826 
827 
828  def get_spatial_block(self):
829  """!
830  @brief Return spatial block - BANG-block description in data space.
831 
832  @return (spatial_block) Spatial block of the BANG-block.
833 
834  """
835  return self.__spatial_block
836 
837 
838  def get_points(self):
839  """!
840  @brief Return points that covers by the BANG-block.
841 
842  @return (list) List of point indexes that are covered by the block.
843 
844  """
845  if self.__points is None:
846  self.__cache_covered_data()
847 
848  return self.__points
849 
850 
851  def set_cluster(self, index):
852  """!
853  @brief Assign cluster to the BANG-block by index.
854 
855  @param[in] index (uint): Index cluster that is assigned to BANG-block.
856 
857  """
858  self.__cluster = index
859 
860 
861  def is_neighbor(self, block):
862  """!
863  @brief Performs calculation to check whether specified block is neighbor to the current.
864 
865  @param[in] block (bang_block): Other BANG-block that should be checked for neighborhood.
866 
867  @return (bool) True if blocks are neighbors, False if blocks are not neighbors.
868 
869  """
870  return self.get_spatial_block().is_neighbor(block.get_spatial_block())
871 
872 
873  def split(self, split_dimension, cache_points):
874  """!
875  @brief Split BANG-block into two new blocks in specified dimension.
876 
877  @param[in] split_dimension (uint): Dimension where block should be split.
878  @param[in] cache_points (bool): If True then covered points are cached. Used for leaf blocks.
879 
880  @return (tuple) Pair of BANG-block that were formed from the current.
881 
882  """
883  left_region_number = self.__region_number
884  right_region_number = self.__region_number + 2 ** self.__level
885 
886  first_spatial_block, second_spatial_block = self.__spatial_block.split(split_dimension)
887 
888  left = bang_block(self.__data, left_region_number, self.__level + 1, first_spatial_block, cache_points)
889  right = bang_block(self.__data, right_region_number, self.__level + 1, second_spatial_block, cache_points)
890 
891  return left, right
892 
893 
894  def __calculate_density(self, amount_points):
895  """!
896  @brief Calculates BANG-block density.
897 
898  @param[in] amount_points (uint): Amount of points in block.
899 
900  @return (double) BANG-block density.
901 
902  """
903  volume = self.__spatial_block.get_volume()
904  if volume != 0.0:
905  return amount_points / volume
906 
907  return 0.0
908 
909 
910  def __get_amount_points(self):
911  """!
912  @brief Count covered points by the BANG-block and if cache is enable then covered points are stored.
913 
914  @return (uint) Amount of covered points.
915 
916  """
917  amount = 0
918  for index in range(len(self.__data)):
919  if self.__data[index] in self.__spatial_block:
920  self.__cache_point(index)
921  amount += 1
922 
923  return amount
924 
925 
926  def __cache_covered_data(self):
927  """!
928  @brief Cache covered data.
929 
930  """
931  self.__cache_points = True
932  self.__points = []
933 
934  for index_point in range(len(self.__data)):
935  if self.__data[index_point] in self.__spatial_block:
936  self.__cache_point(index_point)
937 
938 
939  def __cache_point(self, index):
940  """!
941  @brief Store index points.
942 
943  @param[in] index (uint): Index point that should be stored.
944 
945  """
946  if self.__cache_points:
947  if self.__points is None:
948  self.__points = []
949 
950  self.__points.append(index)
951 
952 
953 
954 class bang:
955  """!
956  @brief Class implements BANG grid based clustering algorithm.
957  @details BANG clustering algorithms uses a multidimensional grid structure to organize the value space surrounding
958  the pattern values. The patterns are grouped into blocks and clustered with respect to the blocks by
959  a topological neighbor search algorithm @cite inproceedings::bang::1.
960 
961  Code example of BANG usage:
962  @code
963  from pyclustering.cluster.bang import bang, bang_visualizer
964  from pyclustering.utils import read_sample
965  from pyclustering.samples.definitions import FCPS_SAMPLES
966 
967  # Read data three dimensional data.
968  data = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
969 
970  # Prepare algorithm's parameters.
971  levels = 11
972 
973  # Create instance of BANG algorithm.
974  bang_instance = bang(data, levels)
975  bang_instance.process()
976 
977  # Obtain clustering results.
978  clusters = bang_instance.get_clusters()
979  noise = bang_instance.get_noise()
980  directory = bang_instance.get_directory()
981  dendrogram = bang_instance.get_dendrogram()
982 
983  # Visualize BANG clustering results.
984  bang_visualizer.show_blocks(directory)
985  bang_visualizer.show_dendrogram(dendrogram)
986  bang_visualizer.show_clusters(data, clusters, noise)
987  @endcode
988 
989  There is visualization of BANG-clustering of three-dimensional data 'chainlink'. BANG-blocks that were formed during
990  processing are shown on following figure. The darkest color means highest density, blocks that does not cover points
991  are transparent:
992  @image html bang_blocks_chainlink.png "Fig. 1. BANG-blocks that cover input data."
993 
994  Here is obtained dendrogram that can be used for further analysis to improve clustering results:
995  @image html bang_dendrogram_chainlink.png "Fig. 2. BANG dendrogram where the X-axis contains BANG-blocks, the Y-axis contains density."
996 
997  BANG clustering result of 'chainlink' data:
998  @image html bang_clustering_chainlink.png "Fig. 3. BANG clustering result. Data: 'chainlink'."
999 
1000  """
1001 
1002  def __init__(self, data, levels, ccore=False, **kwargs):
1003  """!
1004  @brief Create BANG clustering algorithm.
1005 
1006  @param[in] data (list): Input data (list of points) that should be clustered.
1007  @param[in] levels (uint): Amount of levels in tree that is used for splitting (how many times block should be
1008  split). For example, if amount of levels is two then surface will be divided into two blocks and
1009  each obtained block will be divided into blocks also.
1010  @param[in] ccore (bool): Reserved positional argument - not used yet.
1011  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe').
1012 
1013  <b>Keyword Args:</b><br>
1014  - density_threshold (double): If block density is smaller than this value then contained data by this
1015  block is considered as a noise and its points as outliers. Block density is defined by amount of
1016  points in block divided by block volume: <i>amount_block_points</i>/<i>block_volume</i>. By default
1017  it is 0.0 - means than only empty blocks are considered as noise. Be aware that this parameter is used
1018  with parameter 'amount_threshold' - the maximum threshold is considered during processing.
1019  - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is
1020  considered as a noise and there is no need to split it till the last level. Be aware that this parameter
1021  is used with parameter 'density_threshold' - the maximum threshold is considered during processing.
1022 
1023  """
1024  self.__data = data
1025  self.__levels = levels
1026  self.__directory = None
1027  self.__clusters = []
1028  self.__noise = []
1029  self.__cluster_blocks = []
1030  self.__dendrogram = []
1031  self.__density_threshold = kwargs.get('density_threshold', 0.0)
1032  self.__amount_threshold = kwargs.get('amount_threshold', 0)
1033  self.__ccore = ccore
1034 
1035  self.__validate_arguments()
1036 
1037 
1038  def process(self):
1039  """!
1040  @brief Performs clustering process in line with rules of BANG clustering algorithm.
1041 
1042  @return (bang) Returns itself (BANG instance).
1043 
1044  @see get_clusters()
1045  @see get_noise()
1046  @see get_directory()
1047  @see get_dendrogram()
1048 
1049  """
1050  self.__directory = bang_directory(self.__data, self.__levels,
1051  density_threshold=self.__density_threshold,
1052  amount_threshold=self.__amount_threshold)
1053  self.__allocate_clusters()
1054 
1055  return self
1056 
1057 
1058  def get_clusters(self):
1059  """!
1060  @brief Returns allocated clusters.
1061 
1062  @remark Allocated clusters are returned only after data processing (method process()). Otherwise empty list is returned.
1063 
1064  @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
1065 
1066  @see process()
1067  @see get_noise()
1068 
1069  """
1070  return self.__clusters
1071 
1072 
1073  def get_noise(self):
1074  """!
1075  @brief Returns allocated noise.
1076 
1077  @remark Allocated noise is returned only after data processing (method process()). Otherwise empty list is returned.
1078 
1079  @return (list) List of indexes that are marked as a noise.
1080 
1081  @see process()
1082  @see get_clusters()
1083 
1084  """
1085  return self.__noise
1086 
1087 
1088  def get_directory(self):
1089  """!
1090  @brief Returns grid directory that describes grid of the processed data.
1091 
1092  @remark Grid directory is returned only after data processing (method process()). Otherwise None value is returned.
1093 
1094  @return (bang_directory) BANG directory that describes grid of process data.
1095 
1096  @see process()
1097 
1098  """
1099  return self.__directory
1100 
1101 
1102  def get_dendrogram(self):
1103  """!
1104  @brief Returns dendrogram of clusters.
1105  @details Dendrogram is created in following way: the density indices of all regions are calculated and sorted
1106  in decreasing order for each cluster during clustering process.
1107 
1108  @remark Dendrogram is returned only after data processing (method process()). Otherwise empty list is returned.
1109 
1110  """
1111  return self.__dendrogram
1112 
1113 
1115  """!
1116  @brief Returns clustering result representation type that indicate how clusters are encoded.
1117 
1118  @return (type_encoding) Clustering result representation.
1119 
1120  @see get_clusters()
1121 
1122  """
1123 
1124  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
1125 
1126 
1127  def __validate_arguments(self):
1128  """!
1129  @brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception
1130  is thrown.
1131 
1132  """
1133  if len(self.__data) == 0:
1134  raise ValueError("Input data is empty (size: '%d')." % len(self.__data))
1135 
1136  if self.__levels < 1:
1137  raise ValueError("Height of the tree should be greater than 0 (current value: '%d')." % self.__levels)
1138 
1139  if self.__density_threshold < 0.0:
1140  raise ValueError("Density threshold should be greater or equal to 0 (current value: '%d')." %
1141  self.__density_threshold)
1142 
1143  if self.__amount_threshold < 0:
1144  raise ValueError("Amount of points threshold should be greater than 0 (current value: '%d')" %
1145  self.__amount_threshold)
1146 
1147 
1148  def __allocate_clusters(self):
1149  """!
1150  @brief Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
1151 
1152  """
1153  leaf_blocks = self.__directory.get_leafs()
1154  unhandled_block_indexes = set([i for i in range(len(leaf_blocks)) if leaf_blocks[i].get_density() > self.__density_threshold])
1155 
1156  current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
1157  cluster_index = 0
1158 
1159  while current_block is not None:
1160  if current_block.get_density() <= self.__density_threshold or len(current_block) <= self.__amount_threshold:
1161  break
1162 
1163  self.__expand_cluster_block(current_block, cluster_index, leaf_blocks, unhandled_block_indexes)
1164 
1165  current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
1166  cluster_index += 1
1167 
1168  self.__store_clustering_results(cluster_index, leaf_blocks)
1169 
1170 
1171  def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
1172  """!
1173  @brief Expand cluster from specific block that is considered as a central block.
1174 
1175  @param[in] block (bang_block): Block that is considered as a central block for cluster.
1176  @param[in] cluster_index (uint): Index of cluster that is assigned to blocks that forms new cluster.
1177  @param[in] leaf_blocks (list): Leaf BANG-blocks that are considered during cluster formation.
1178  @param[in] unhandled_block_indexes (set): Set of candidates (BANG block indexes) to become a cluster member. The
1179  parameter helps to reduce traversing among BANG-block providing only restricted set of block that
1180  should be considered.
1181 
1182  """
1183 
1184  block.set_cluster(cluster_index)
1185  self.__update_cluster_dendrogram(cluster_index, [block])
1186 
1187  neighbors = self.__find_block_neighbors(block, leaf_blocks, unhandled_block_indexes)
1188  self.__update_cluster_dendrogram(cluster_index, neighbors)
1189 
1190  for neighbor in neighbors:
1191  neighbor.set_cluster(cluster_index)
1192  neighbor_neighbors = self.__find_block_neighbors(neighbor, leaf_blocks, unhandled_block_indexes)
1193  self.__update_cluster_dendrogram(cluster_index, neighbor_neighbors)
1194 
1195  neighbors += neighbor_neighbors
1196 
1197 
1198  def __store_clustering_results(self, amount_clusters, leaf_blocks):
1199  """!
1200  @brief Stores clustering results in a convenient way.
1201 
1202  @param[in] amount_clusters (uint): Amount of cluster that was allocated during processing.
1203  @param[in] leaf_blocks (list): Leaf BANG-blocks (the smallest cells).
1204 
1205  """
1206  self.__clusters = [[] for _ in range(amount_clusters)]
1207  for block in leaf_blocks:
1208  index = block.get_cluster()
1209 
1210  if index is not None:
1211  self.__clusters[index] += block.get_points()
1212  else:
1213  self.__noise += block.get_points()
1214 
1215  self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
1216  self.__noise = list(set(self.__noise))
1217 
1218 
1219  def __find_block_center(self, level_blocks, unhandled_block_indexes):
1220  """!
1221  @brief Search block that is cluster center for new cluster.
1222 
1223  @return (bang_block) Central block for new cluster, if cluster is not found then None value is returned.
1224 
1225  """
1226  for i in reversed(range(len(level_blocks))):
1227  if level_blocks[i].get_density() <= self.__density_threshold:
1228  return None
1229 
1230  if level_blocks[i].get_cluster() is None:
1231  unhandled_block_indexes.remove(i)
1232  return level_blocks[i]
1233 
1234  return None
1235 
1236 
1237  def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
1238  """!
1239  @brief Search block neighbors that are parts of new clusters (density is greater than threshold and that are
1240  not cluster members yet), other neighbors are ignored.
1241 
1242  @param[in] block (bang_block): BANG-block for which neighbors should be found (which can be part of cluster).
1243  @param[in] level_blocks (list): BANG-blocks on specific level.
1244  @param[in] unhandled_block_indexes (set): Blocks that have not been processed yet.
1245 
1246  @return (list) Block neighbors that can become part of cluster.
1247 
1248  """
1249  neighbors = []
1250 
1251  handled_block_indexes = []
1252  for unhandled_index in unhandled_block_indexes:
1253  if block.is_neighbor(level_blocks[unhandled_index]):
1254  handled_block_indexes.append(unhandled_index)
1255  neighbors.append(level_blocks[unhandled_index])
1256 
1257  # Maximum number of neighbors is eight
1258  if len(neighbors) == 8:
1259  break
1260 
1261  for handled_index in handled_block_indexes:
1262  unhandled_block_indexes.remove(handled_index)
1263 
1264  return neighbors
1265 
1266 
1267  def __update_cluster_dendrogram(self, index_cluster, blocks):
1268  """!
1269  @brief Append clustered blocks to dendrogram.
1270 
1271  @param[in] index_cluster (uint): Cluster index that was assigned to blocks.
1272  @param[in] blocks (list): Blocks that were clustered.
1273 
1274  """
1275  if len(self.__dendrogram) <= index_cluster:
1276  self.__dendrogram.append([])
1277 
1278  blocks = sorted(blocks, key=lambda block: block.get_density(), reverse=True)
1279  self.__dendrogram[index_cluster] += blocks
1280 
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:390
pyclustering module for cluster analysis.
Definition: __init__.py:1
def __update_cluster_dendrogram(self, index_cluster, blocks)
Append clustered blocks to dendrogram.
Definition: bang.py:1267
def get_clusters(self)
Returns allocated clusters.
Definition: bang.py:1058
def get_noise(self)
Returns allocated noise.
Definition: bang.py:1073
def set_cluster(self, index)
Assign cluster to the BANG-block by index.
Definition: bang.py:851
def __draw_clusters(self)
Display clusters and outliers using different colors.
Definition: bang.py:335
def __draw_block(self, block, block_alpha=0.0)
Display single BANG block on axis.
Definition: bang.py:299
def get_points(self)
Return points that covers by the BANG-block.
Definition: bang.py:838
def __calculate_volume(self)
Calculates volume of current spatial block.
Definition: bang.py:730
BANG-block that represent spatial region in data space.
Definition: bang.py:751
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
Module for representing clustering results.
Definition: encoder.py:1
def is_neighbor(self, block)
Performs calculation to check whether specified block is neighbor to the current. ...
Definition: bang.py:861
def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes)
Expand cluster from specific block that is considered as a central block.
Definition: bang.py:1171
def get_height(self)
Returns height of BANG tree where blocks are stored.
Definition: bang.py:496
Colors used by pyclustering library for visualization.
Definition: color.py:1
def is_neighbor(self, block)
Performs calculation to identify whether specified block is neighbor of current block.
Definition: bang.py:686
Visualizer of BANG algorithm&#39;s results.
Definition: bang.py:48
def get_region(self)
Returns region number of BANG-block.
Definition: bang.py:795
def __init__(self, data, levels, kwargs)
Create BANG directory - basically tree structure with direct access to leafs.
Definition: bang.py:422
def __contains__(self, point)
Point is considered as contained if it lies in block (belong to it).
Definition: bang.py:630
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: bang.py:1114
def __get_amount_points(self)
Count covered points by the BANG-block and if cache is enable then covered points are stored...
Definition: bang.py:910
def __init__(self, data, region, level, space_block, cache_points=False)
Create BANG-block.
Definition: bang.py:756
def get_leafs(self)
Return leafs - the smallest blocks.
Definition: bang.py:472
Class implements BANG grid based clustering algorithm.
Definition: bang.py:954
def __create_directory(self)
Create BANG directory as a tree with separate storage for leafs.
Definition: bang.py:506
def __str__(self)
Returns string representation of BANG-block using region number and level where block is located...
Definition: bang.py:779
def __draw_cluster(self, data, cluster, color, marker)
Draw 2-D single cluster on axis using specified color and marker.
Definition: bang.py:348
def __find_block_center(self, level_blocks, unhandled_block_indexes)
Search block that is cluster center for new cluster.
Definition: bang.py:1219
def __allocate_clusters(self)
Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
Definition: bang.py:1148
def get_dendrogram(self)
Returns dendrogram of clusters.
Definition: bang.py:1102
Geometrical description of BANG block in data space.
Definition: bang.py:598
def __len__(self)
Returns block size defined by amount of points that are contained by this block.
Definition: bang.py:787
Provides service for creating 2-D animation using BANG clustering results.
Definition: bang.py:223
def get_corners(self)
Return spatial description of current block.
Definition: bang.py:644
def __split_block(self, block, split_dimension, cache_require, current_level_blocks)
Split specific block in specified dimension.
Definition: bang.py:577
def __increment_block(self)
Increment BANG block safely by updating block index, level and level block.
Definition: bang.py:285
def get_data(self)
Return data that is stored in the directory.
Definition: bang.py:462
def __str__(self)
Returns string block description.
Definition: bang.py:620
def get_directory(self)
Returns grid directory that describes grid of the processed data.
Definition: bang.py:1088
def show_blocks(directory)
Show BANG-blocks (leafs only) in data space.
Definition: bang.py:59
def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes)
Search block neighbors that are parts of new clusters (density is greater than threshold and that are...
Definition: bang.py:1237
def __calculate_density(self, amount_points)
Calculates BANG-block density.
Definition: bang.py:894
def __calculate_neighborhood(self, block_max_corner)
Calculates neighborhood score that defined whether blocks are neighbors.
Definition: bang.py:707
def __init__(self, data, levels, ccore=False, kwargs)
Create BANG clustering algorithm.
Definition: bang.py:1002
def __build_level(self, previous_level_blocks, level)
Build new level of directory.
Definition: bang.py:553
def __store_level_blocks(self, level_blocks)
Store level blocks if observing is enabled.
Definition: bang.py:525
def __init__(self, directory, clusters)
Creates BANG animator instance.
Definition: bang.py:253
def __cache_covered_data(self)
Cache covered data.
Definition: bang.py:926
def __validate_arguments(self)
Check correctness of input arguments and throw exception if incorrect is found.
Definition: bang.py:276
def get_volume(self)
Returns volume of current block.
Definition: bang.py:654
def split(self, dimension)
Split current block into two spatial blocks in specified dimension.
Definition: bang.py:666
def get_density(self)
Returns density of the BANG-block.
Definition: bang.py:807
def __build_directory_levels(self)
Build levels of direction if amount of level is greater than one.
Definition: bang.py:538
def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None)
Animates clustering process that is performed by BANG algorithm.
Definition: bang.py:357
def show_clusters(data, clusters, noise=None)
Display BANG clustering results.
Definition: bang.py:119
def __init__(self, max_corner, min_corner)
Creates spatial block in data space.
Definition: bang.py:607
def __len__(self)
Returns amount of blocks that is stored in the directory.
Definition: bang.py:452
def __draw_leaf_density(self)
Display densities by filling blocks by appropriate colors.
Definition: bang.py:320
def split(self, split_dimension, cache_points)
Split BANG-block into two new blocks in specified dimension.
Definition: bang.py:873
def show_dendrogram(dendrogram)
Display dendrogram of BANG-blocks.
Definition: bang.py:89
def process(self)
Performs clustering process in line with rules of BANG clustering algorithm.
Definition: bang.py:1038
def __validate_arguments(self)
Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception ...
Definition: bang.py:1127
BANG directory stores BANG-blocks that represents grid in data space.
Definition: bang.py:415
def get_level(self, level)
Returns BANG blocks on the specific level.
Definition: bang.py:484
def __cache_point(self, index)
Store index points.
Definition: bang.py:939
def __store_clustering_results(self, amount_clusters, leaf_blocks)
Stores clustering results in a convenient way.
Definition: bang.py:1198
def get_spatial_block(self)
Return spatial block - BANG-block description in data space.
Definition: bang.py:828
def get_cluster(self)
Return index of cluster to which the BANG-block belongs to.
Definition: bang.py:817