bang.py
1 """!
2 
3 @brief Cluster analysis algorithm: BANG.
4 @details Implementation based on paper @cite inproceedings::bang::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2019
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 import itertools
28 import warnings
29 
30 try:
31  import matplotlib
32  import matplotlib.gridspec as gridspec
33  import matplotlib.pyplot as plt
34  import matplotlib.patches as patches
35  import matplotlib.animation as animation
36 except Exception as error_instance:
37  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
38  "functionality is not available (details: '%s')." % str(error_instance))
39 
40 from pyclustering.cluster import cluster_visualizer
41 from pyclustering.cluster.encoder import type_encoding
42 
43 from pyclustering.utils import data_corners
44 from pyclustering.utils.color import color as color_list
45 
46 
47 
49  """!
50  @brief Visualizer of BANG algorithm's results.
51  @details BANG visualizer provides visualization services that are specific for BANG algorithm.
52 
53  """
54 
55  __maximum_density_alpha = 0.6
56 
57 
58  @staticmethod
59  def show_blocks(directory):
60  """!
61  @brief Show BANG-blocks (leafs only) in data space.
62  @details BANG-blocks represents grid that was used for clustering process.
63 
64  @param[in] directory (bang_directory): Directory that was created by BANG algorithm during clustering process.
65 
66  """
67 
68  dimension = len(directory.get_data()[0])
69 
70  amount_canvases = 1
71  if dimension > 1:
72  amount_canvases = int(dimension * (dimension - 1) / 2)
73 
74  figure = plt.figure()
75  grid_spec = gridspec.GridSpec(1, amount_canvases)
76 
77  pairs = list(itertools.combinations(range(dimension), 2))
78  if len(pairs) == 0: pairs = [(0, 0)]
79 
80  for index in range(amount_canvases):
81  ax = figure.add_subplot(grid_spec[index])
82  bang_visualizer.__draw_blocks(ax, directory.get_leafs(), pairs[index])
83  bang_visualizer.__draw_two_dimension_data(ax, directory.get_data(), pairs[index])
84 
85  plt.show()
86 
87 
88  @staticmethod
89  def show_dendrogram(dendrogram):
90  """!
91  @brief Display dendrogram of BANG-blocks.
92 
93  @param[in] dendrogram (list): List representation of dendrogram of BANG-blocks.
94 
95  @see bang.get_dendrogram()
96 
97  """
98  plt.figure()
99  axis = plt.subplot(1, 1, 1)
100 
101  current_position = 0
102  for index_cluster in range(len(dendrogram)):
103  densities = [ block.get_density() for block in dendrogram[index_cluster] ]
104  xrange = range(current_position, current_position + len(densities))
105 
106  axis.bar(xrange, densities, 1.0, linewidth=0.0, color=color_list.get_color(index_cluster))
107 
108  current_position += len(densities)
109 
110  axis.set_ylabel("density")
111  axis.set_xlabel("block")
112  axis.xaxis.set_ticklabels([])
113 
114  plt.xlim([-0.5, current_position - 0.5])
115  plt.show()
116 
117 
118  @staticmethod
119  def show_clusters(data, clusters, noise=None):
120  """!
121  @brief Display BANG clustering results.
122 
123  @param[in] data (list): Dataset that was used for clustering.
124  @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
125  @param[in] noise (array_like): Noise that were allocated by the algorithm.
126 
127  """
128  visualizer = cluster_visualizer()
129  visualizer.append_clusters(clusters, data)
130  visualizer.append_cluster(noise or [], data, marker='x')
131  visualizer.show()
132 
133 
134  @staticmethod
135  def __draw_two_dimension_data(ax, data, pair):
136  """!
137  @brief Display data in two-dimensional canvas.
138 
139  @param[in] ax (Axis): Canvas where data should be displayed.
140  @param[in] data (list): Data points that should be displayed.
141  @param[in] pair (tuple): Pair of dimension indexes.
142 
143  """
144  ax.set_xlabel("x%d" % pair[0])
145  ax.set_ylabel("x%d" % pair[1])
146 
147  for point in data:
148  if len(data[0]) > 1:
149  ax.plot(point[pair[0]], point[pair[1]], color='red', marker='.')
150  else:
151  ax.plot(point[pair[0]], 0, color='red', marker='.')
152  ax.yaxis.set_ticklabels([])
153 
154 
155  @staticmethod
156  def __draw_blocks(ax, blocks, pair):
157  """!
158  @brief Display BANG-blocks on specified figure.
159 
160  @param[in] ax (Axis): Axis where bang-blocks should be displayed.
161  @param[in] blocks (list): List of blocks that should be displyed.
162  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
163 
164  """
165  ax.grid(False)
166 
167  density_scale = blocks[-1].get_density()
168  for block in blocks:
169  bang_visualizer.__draw_block(ax, pair, block, density_scale)
170 
171 
172  @staticmethod
173  def __draw_block(ax, pair, block, density_scale):
174  """!
175  @brief Display BANG-block on the specified ax.
176 
177  @param[in] ax (Axis): Axis where block should be displayed.
178  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
179  @param[in] block (bang_block): BANG-block that should be displayed.
180  @param[in] density_scale (double): Max density to display density of the block by appropriate tone.
181 
182  """
183  max_corner, min_corner = bang_visualizer.__get_rectangle_description(block, pair)
184 
185  belong_cluster = block.get_cluster() is not None
186 
187  if density_scale != 0.0:
188  density_scale = bang_visualizer.__maximum_density_alpha * block.get_density() / density_scale
189 
190  face_color = matplotlib.colors.to_rgba('blue', alpha=density_scale)
191  edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
192 
193  rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
194  fill=belong_cluster,
195  facecolor=face_color,
196  edgecolor=edge_color,
197  linewidth=0.5)
198  ax.add_patch(rect)
199 
200 
201  @staticmethod
202  def __get_rectangle_description(block, pair):
203  """!
204  @brief Create rectangle description for block in specific dimension.
205 
206  @param[in] pair (tuple): Pair of coordinate index that should be displayed.
207  @param[in] block (bang_block): BANG-block that should be displayed
208 
209  @return (tuple) Pair of corners that describes rectangle.
210 
211  """
212  max_corner, min_corner = block.get_spatial_block().get_corners()
213 
214  max_corner = [max_corner[pair[0]], max_corner[pair[1]]]
215  min_corner = [min_corner[pair[0]], min_corner[pair[1]]]
216 
217  if pair == (0, 0):
218  max_corner[1], min_corner[1] = 1.0, -1.0
219 
220  return max_corner, min_corner
221 
222 
224  """!
225  @brief Provides service for creating 2-D animation using BANG clustering results.
226  @details The animator does not support visualization of clustering process where non 2-dimensional was used.
227 
228  Code example of animation of BANG clustering process:
229  @code
230  from pyclustering.cluster.bang import bang, bang_animator
231  from pyclustering.utils import read_sample
232  from pyclustering.samples.definitions import FCPS_SAMPLES
233 
234  # Read data two dimensional data.
235  data = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
236 
237  # Create instance of BANG algorithm.
238  bang_instance = bang(data, 9)
239  bang_instance.process()
240 
241  # Obtain clustering results.
242  clusters = bang_instance.get_clusters()
243  noise = bang_instance.get_noise()
244  directory = bang_instance.get_directory()
245 
246  # Create BANG animation using class 'bang_animator':
247  animator = bang_animator(directory, clusters)
248  animator.animate()
249  @endcode
250 
251 
252  """
253  def __init__(self, directory, clusters):
254  """!
255  @brief Creates BANG animator instance.
256 
257  @param[in] directory (bang_directory): BANG directory that was formed during BANG clustering process.
258  @param[in] clusters (list): Allocated clusters during BANG clustering process.
259 
260  """
261  self.__directory = directory
262  self.__clusters = clusters
263  self.__noise = []
264 
265  self.__current_block = 0
266  self.__current_level = 0
267  self.__level_blocks = directory.get_level(0)
268 
269  self.__figure = plt.figure()
270  self.__ax = self.__figure.add_subplot(1, 1, 1)
271  self.__special_frame = 0
272 
273  self.__validate_arguments()
274 
275 
276  def __validate_arguments(self):
277  """!
278  @brief Check correctness of input arguments and throw exception if incorrect is found.
279 
280  """
281  if len(self.__directory.get_data()[0]) != 2:
282  raise ValueError("Impossible to animate BANG clustering process for non 2D data.")
283 
284 
285  def __increment_block(self):
286  """!
287  @brief Increment BANG block safely by updating block index, level and level block.
288 
289  """
290  self.__current_block += 1
291  if self.__current_block >= len(self.__level_blocks):
292  self.__current_block = 0
293  self.__current_level += 1
294 
295  if self.__current_level < self.__directory.get_height():
296  self.__level_blocks = self.__directory.get_level(self.__current_level)
297 
298 
299  def __draw_block(self, block, block_alpha=0.0):
300  """!
301  @brief Display single BANG block on axis.
302 
303  @param[in] block (bang_block): BANG block that should be displayed.
304  @param[in] block_alpha (double): Transparency level - value of alpha.
305 
306  """
307  max_corner, min_corner = block.get_spatial_block().get_corners()
308 
309  face_color = matplotlib.colors.to_rgba('blue', alpha=block_alpha)
310  edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
311 
312  rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
313  fill=True,
314  facecolor=face_color,
315  edgecolor=edge_color,
316  linewidth=0.5)
317  self.__ax.add_patch(rect)
318 
319 
320  def __draw_leaf_density(self):
321  """!
322  @brief Display densities by filling blocks by appropriate colors.
323 
324  """
325  leafs = self.__directory.get_leafs()
326  density_scale = leafs[-1].get_density()
327 
328  if density_scale == 0.0: density_scale = 1.0
329 
330  for block in leafs:
331  alpha = 0.8 * block.get_density() / density_scale
332  self.__draw_block(block, alpha)
333 
334 
335  def __draw_clusters(self):
336  """!
337  @brief Display clusters and outliers using different colors.
338 
339  """
340  data = self.__directory.get_data()
341  for index_cluster in range(len(self.__clusters)):
342  color = color_list.get_color(index_cluster)
343  self.__draw_cluster(data, self.__clusters[index_cluster], color, '.')
344 
345  self.__draw_cluster(self.__directory.get_data(), self.__noise, 'gray', 'x')
346 
347 
348  def __draw_cluster(self, data, cluster, color, marker):
349  """!
350  @brief Draw 2-D single cluster on axis using specified color and marker.
351 
352  """
353  for item in cluster:
354  self.__ax.plot(data[item][0], data[item][1], color=color, marker=marker)
355 
356 
357  def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None):
358  """!
359  @brief Animates clustering process that is performed by BANG algorithm.
360 
361  @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
362  @param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
363  @param[in] movie_filename (string): If it is specified then animation will be stored to file that is specified in this parameter.
364 
365  """
366  def init_frame():
367  self.__figure.clf()
368  self.__ax = self.__figure.add_subplot(1, 1, 1)
369  self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
370 
371  for point in self.__directory.get_data():
372  self.__ax.plot(point[0], point[1], color='red', marker='.')
373 
374  return frame_generation(0)
375 
376 
377  def frame_generation(index_iteration):
378  if self.__current_level < self.__directory.get_height():
379  block = self.__level_blocks[self.__current_block]
380  self.__draw_block(block)
381  self.__increment_block()
382 
383  else:
384  if self.__special_frame == 0:
385  self.__draw_leaf_density()
386 
387  elif self.__special_frame == 15:
388  self.__draw_clusters()
389 
390  elif self.__special_frame == 30:
391  self.__figure.clf()
392  self.__ax = self.__figure.add_subplot(1, 1, 1)
393  self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
394 
395  self.__draw_clusters()
396 
397  self.__special_frame += 1
398 
399 
400 
401  iterations = len(self.__directory) + 60
402  # print("Total number of iterations: %d" % iterations)
403  cluster_animation = animation.FuncAnimation(self.__figure, frame_generation, iterations,
404  interval=animation_velocity,
405  init_func=init_frame,
406  repeat_delay=5000)
407 
408  if movie_filename is not None:
409  cluster_animation.save(movie_filename, writer = 'ffmpeg', fps = movie_fps, bitrate = 3500)
410  else:
411  plt.show()
412 
413 
414 
416  """!
417  @brief BANG directory stores BANG-blocks that represents grid in data space.
418  @details The directory build BANG-blocks in binary tree manner. Leafs of the tree stored separately to provide
419  a direct access to the leafs that should be analysed. Leafs cache data-points.
420 
421  """
422  def __init__(self, data, levels, **kwargs):
423  """!
424  @brief Create BANG directory - basically tree structure with direct access to leafs.
425 
426  @param[in] data (list): Input data that is clustered.
427  @param[in] levels (uint): Height of the blocks tree.
428  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe').
429 
430  <b>Keyword Args:</b><br>
431  - observe (bool): If 'True' then blocks on each level are stored.
432  - density_threshold (double): The lowest level of density when contained data in bang-block is
433  considered as a noise and there is no need to split it till the last level. Be aware that this
434  parameter is used with 'amount_threshold' parameter.
435  - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is
436  considered as a noise and there is no need to split it till the last level.
437 
438  """
439  self.__data = data
440  self.__levels = levels
441  self.__density_threshold = kwargs.get('density_threshold', 0.0)
442  self.__amount_density = kwargs.get('amount_threshold', 0)
443  self.__leafs = []
444  self.__root = None
445  self.__level_blocks = []
446  self.__size = 0
447  self.__observe = kwargs.get('observe', True)
448 
449  self.__create_directory()
450 
451 
452  def __len__(self):
453  """!
454  @brief Returns amount of blocks that is stored in the directory
455 
456  @return (uint) Amount of blocks in the BANG directory.
457 
458  """
459  return self.__size
460 
461 
462  def get_data(self):
463  """!
464  @brief Return data that is stored in the directory.
465 
466  @return (list) List of points that represents stored data.
467 
468  """
469  return self.__data
470 
471 
472  def get_leafs(self):
473  """!
474  @brief Return leafs - the smallest blocks.
475  @details Some leafs can be bigger than others because splitting is not performed for blocks whose density is
476  less than threshold.
477 
478  @return (list) List of blocks that are leafs of BANG directory.
479 
480  """
481  return self.__leafs
482 
483 
484  def get_level(self, level):
485  """!
486  @brief Returns BANG blocks on the specific level.
487 
488  @param[in] level (uint): Level of tree where BANG blocks are located.
489 
490  @return (list) List of BANG blocks on the specific level.
491 
492  """
493  return self.__level_blocks[level]
494 
495 
496  def get_height(self):
497  """!
498  @brief Returns height of BANG tree where blocks are stored.
499 
500  @return (uint) Height of BANG tree.
501 
502  """
503  return len(self.__level_blocks)
504 
505 
506  def __create_directory(self):
507  """!
508  @brief Create BANG directory as a tree with separate storage for leafs.
509 
510  """
511 
512  min_corner, max_corner = data_corners(self.__data)
513  data_block = spatial_block(max_corner, min_corner)
514 
515  cache_require = (self.__levels == 1)
516  self.__root = bang_block(self.__data, 0, 0, data_block, cache_require)
517 
518  if cache_require:
519  self.__leafs.append(self.__root)
520  self.__store_level_blocks([self.__root])
521  else:
523 
524 
525  def __store_level_blocks(self, level_blocks):
526  """!
527  @brief Store level blocks if observing is enabled.
528 
529  @param[in] level_blocks (list): Created blocks on a new level.
530 
531  """
532  self.__size += len(level_blocks)
533  if self.__observe is True:
534  self.__level_blocks.append(level_blocks)
535 
536 
537 
538  def __build_directory_levels(self):
539  """!
540  @brief Build levels of direction if amount of level is greater than one.
541 
542  """
543 
544  previous_level_blocks = [ self.__root ]
545 
546  for level in range(1, self.__levels):
547  previous_level_blocks = self.__build_level(previous_level_blocks, level)
548  self.__store_level_blocks(previous_level_blocks)
549 
550  self.__leafs = sorted(self.__leafs, key=lambda block: block.get_density())
551 
552 
553  def __build_level(self, previous_level_blocks, level):
554  """!
555  @brief Build new level of directory.
556 
557  @param[in] previous_level_blocks (list): BANG-blocks on the previous level.
558  @param[in] level (uint): Level number that should be built.
559 
560  @return (list) New block on the specified level.
561 
562  """
563  current_level_blocks = []
564 
565  split_dimension = level % len(self.__data[0])
566  cache_require = (level == self.__levels - 1)
567 
568  for block in previous_level_blocks:
569  self.__split_block(block, split_dimension, cache_require, current_level_blocks)
570 
571  if cache_require:
572  self.__leafs += current_level_blocks
573 
574  return current_level_blocks
575 
576 
577  def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
578  """!
579  @brief Split specific block in specified dimension.
580  @details Split is not performed for block whose density is lower than threshold value, such blocks are putted to
581  leafs.
582 
583  @param[in] block (bang_block): BANG-block that should be split.
584  @param[in] split_dimension (uint): Dimension at which splitting should be performed.
585  @param[in] cache_require (bool): Defines when points in cache should be stored during density calculation.
586  @param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added.
587 
588  """
589  if block.get_density() <= self.__density_threshold or len(block) <= self.__amount_density:
590  self.__leafs.append(block)
591 
592  else:
593  left, right = block.split(split_dimension, cache_require)
594  current_level_blocks.append(left)
595  current_level_blocks.append(right)
596 
597 
598 
600  """!
601  @brief Geometrical description of BANG block in data space.
602  @details Provides services related to spatial functionality and used by bang_block
603 
604  @see bang_block
605 
606  """
607 
608  def __init__(self, max_corner, min_corner):
609  """!
610  @brief Creates spatial block in data space.
611 
612  @param[in] max_corner (array_like): Maximum corner coordinates of the block.
613  @param[in] min_corner (array_like): Minimal corner coordinates of the block.
614 
615  """
616  self.__max_corner = max_corner
617  self.__min_corner = min_corner
618  self.__volume = self.__calculate_volume()
619 
620 
621  def __str__(self):
622  """!
623  @brief Returns string block description.
624 
625  @return String representation of the block.
626 
627  """
628  return "(max: %s; min: %s)" % (self.__max_corner, self.__min_corner)
629 
630 
631  def __contains__(self, point):
632  """!
633  @brief Point is considered as contained if it lies in block (belong to it).
634 
635  @return (bool) True if point is in block, otherwise False.
636 
637  """
638  for i in range(len(point)):
639  if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
640  return False
641 
642  return True
643 
644 
645  def get_corners(self):
646  """!
647  @brief Return spatial description of current block.
648 
649  @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner).
650 
651  """
652  return self.__max_corner, self.__min_corner
653 
654 
655  def get_volume(self):
656  """!
657  @brief Returns volume of current block.
658  @details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle,
659  for 3D is volume of 3D figure, and for ND is volume of ND figure.
660 
661  @return (double) Volume of current block.
662 
663  """
664  return self.__volume
665 
666 
667  def split(self, dimension):
668  """!
669  @brief Split current block into two spatial blocks in specified dimension.
670 
671  @param[in] dimension (uint): Dimension where current block should be split.
672 
673  @return (tuple) Pair of new split blocks from current block.
674 
675  """
676  first_max_corner = self.__max_corner[:]
677  second_min_corner = self.__min_corner[:]
678 
679  split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
680 
681  first_max_corner[dimension] = split_border
682  second_min_corner[dimension] = split_border
683 
684  return spatial_block(first_max_corner, self.__min_corner), spatial_block(self.__max_corner, second_min_corner)
685 
686 
687  def is_neighbor(self, block):
688  """!
689  @brief Performs calculation to identify whether specified block is neighbor of current block.
690  @details It also considers diagonal blocks as neighbors.
691 
692  @param[in] block (spatial_block): Another block that is check whether it is neighbor.
693 
694  @return (bool) True is blocks are neighbors, False otherwise.
695 
696  """
697  if block is not self:
698  block_max_corner, _ = block.get_corners()
699  dimension = len(block_max_corner)
700  neighborhood_score = self.__calculate_neighborhood(block_max_corner)
701 
702  if neighborhood_score == dimension:
703  return True
704 
705  return False
706 
707 
708  def __calculate_neighborhood(self, block_max_corner):
709  """!
710  @brief Calculates neighborhood score that defined whether blocks are neighbors.
711 
712  @param[in] block_max_corner (list): Maximum coordinates of other block.
713 
714  @return (uint) Neighborhood score.
715 
716  """
717  dimension = len(block_max_corner)
718 
719  length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
720 
721  neighborhood_score = 0
722  for i in range(dimension):
723  diff = abs(block_max_corner[i] - self.__max_corner[i])
724 
725  if diff <= length_edges[i] + length_edges[i] * 0.0001:
726  neighborhood_score += 1
727 
728  return neighborhood_score
729 
730 
731  def __calculate_volume(self):
732  """!
733  @brief Calculates volume of current spatial block.
734  @details If empty dimension is detected (where all points has the same value) then such dimension is ignored
735  during calculation of volume.
736 
737  @return (double) Volume of current spatial block.
738 
739  """
740 
741  volume = 0.0
742  for i in range(0, len(self.__max_corner)):
743  side_length = self.__max_corner[i] - self.__min_corner[i]
744 
745  if side_length != 0.0:
746  if volume == 0.0: volume = side_length
747  else: volume *= side_length
748 
749  return volume
750 
751 
753  """!
754  @brief BANG-block that represent spatial region in data space.
755 
756  """
757  def __init__(self, data, region, level, space_block, cache_points=False):
758  """!
759  @brief Create BANG-block.
760 
761  @param[in] data (list): List of points that are processed.
762  @param[in] region (uint): Region number - unique value on a level.
763  @param[in] level (uint): Level number where block is created.
764  @param[in] space_block (spatial_block): Spatial block description in data space.
765  @param[in] cache_points (bool): if True then points are stored in memory (used for leaf blocks).
766 
767  """
768  self.__data = data
769  self.__region_number = region
770  self.__level = level
771  self.__spatial_block = space_block
772  self.__cache_points = cache_points
773 
774  self.__cluster = None
775  self.__points = None
778 
779 
780  def __str__(self):
781  """!
782  @brief Returns string representation of BANG-block using region number and level where block is located.
783 
784  """
785  return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
786 
787 
788  def __len__(self):
789  """!
790  @brief Returns block size defined by amount of points that are contained by this block.
791 
792  """
793  return self.__amount_points
794 
795 
796  def get_region(self):
797  """!
798  @brief Returns region number of BANG-block.
799  @details Region number is unique on among region numbers on a directory level. Pair of region number and level
800  is unique for all directory.
801 
802  @return (uint) Region number.
803 
804  """
805  return self.__region_number
806 
807 
808  def get_density(self):
809  """!
810  @brief Returns density of the BANG-block.
811 
812  @return (double) BANG-block density.
813 
814  """
815  return self.__density
816 
817 
818  def get_cluster(self):
819  """!
820  @brief Return index of cluster to which the BANG-block belongs to.
821  @details Index of cluster may have None value if the block was not assigned to any cluster.
822 
823  @return (uint) Index of cluster or None if the block does not belong to any cluster.
824 
825  """
826  return self.__cluster
827 
828 
829  def get_spatial_block(self):
830  """!
831  @brief Return spatial block - BANG-block description in data space.
832 
833  @return (spatial_block) Spatial block of the BANG-block.
834 
835  """
836  return self.__spatial_block
837 
838 
839  def get_points(self):
840  """!
841  @brief Return points that covers by the BANG-block.
842 
843  @return (list) List of point indexes that are covered by the block.
844 
845  """
846  if self.__points is None:
847  self.__cache_covered_data()
848 
849  return self.__points
850 
851 
852  def set_cluster(self, index):
853  """!
854  @brief Assign cluster to the BANG-block by index.
855 
856  @param[in] index (uint): Index cluster that is assigned to BANG-block.
857 
858  """
859  self.__cluster = index
860 
861 
862  def is_neighbor(self, block):
863  """!
864  @brief Performs calculation to check whether specified block is neighbor to the current.
865 
866  @param[in] block (bang_block): Other BANG-block that should be checked for neighborhood.
867 
868  @return (bool) True if blocks are neighbors, False if blocks are not neighbors.
869 
870  """
871  return self.get_spatial_block().is_neighbor(block.get_spatial_block())
872 
873 
874  def split(self, split_dimension, cache_points):
875  """!
876  @brief Split BANG-block into two new blocks in specified dimension.
877 
878  @param[in] split_dimension (uint): Dimension where block should be split.
879  @param[in] cache_points (bool): If True then covered points are cached. Used for leaf blocks.
880 
881  @return (tuple) Pair of BANG-block that were formed from the current.
882 
883  """
884  left_region_number = self.__region_number
885  right_region_number = self.__region_number + 2 ** self.__level
886 
887  first_spatial_block, second_spatial_block = self.__spatial_block.split(split_dimension)
888 
889  left = bang_block(self.__data, left_region_number, self.__level + 1, first_spatial_block, cache_points)
890  right = bang_block(self.__data, right_region_number, self.__level + 1, second_spatial_block, cache_points)
891 
892  return left, right
893 
894 
895  def __calculate_density(self, amount_points):
896  """!
897  @brief Calculates BANG-block density.
898 
899  @param[in] amount_points (uint): Amount of points in block.
900 
901  @return (double) BANG-block density.
902 
903  """
904  volume = self.__spatial_block.get_volume()
905  if volume != 0.0:
906  return amount_points / volume
907 
908  return 0.0
909 
910 
911  def __get_amount_points(self):
912  """!
913  @brief Count covered points by the BANG-block and if cache is enable then covered points are stored.
914 
915  @return (uint) Amount of covered points.
916 
917  """
918  amount = 0
919  for index in range(len(self.__data)):
920  if self.__data[index] in self.__spatial_block:
921  self.__cache_point(index)
922  amount += 1
923 
924  return amount
925 
926 
927  def __cache_covered_data(self):
928  """!
929  @brief Cache covered data.
930 
931  """
932  self.__cache_points = True
933  self.__points = []
934 
935  for index_point in range(len(self.__data)):
936  if self.__data[index_point] in self.__spatial_block:
937  self.__cache_point(index_point)
938 
939 
940  def __cache_point(self, index):
941  """!
942  @brief Store index points.
943 
944  @param[in] index (uint): Index point that should be stored.
945 
946  """
947  if self.__cache_points:
948  if self.__points is None:
949  self.__points = []
950 
951  self.__points.append(index)
952 
953 
954 
955 class bang:
956  """!
957  @brief Class implements BANG grid based clustering algorithm.
958  @details BANG clustering algorithms uses a multidimensional grid structure to organize the value space surrounding
959  the pattern values. The patterns are grouped into blocks and clustered with respect to the blocks by
960  a topological neighbor search algorithm @cite inproceedings::bang::1.
961 
962  Code example of BANG usage:
963  @code
964  from pyclustering.cluster.bang import bang, bang_visualizer
965  from pyclustering.utils import read_sample
966  from pyclustering.samples.definitions import FCPS_SAMPLES
967 
968  # Read data three dimensional data.
969  data = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
970 
971  # Prepare algorithm's parameters.
972  levels = 11
973 
974  # Create instance of BANG algorithm.
975  bang_instance = bang(data, levels)
976  bang_instance.process()
977 
978  # Obtain clustering results.
979  clusters = bang_instance.get_clusters()
980  noise = bang_instance.get_noise()
981  directory = bang_instance.get_directory()
982  dendrogram = bang_instance.get_dendrogram()
983 
984  # Visualize BANG clustering results.
985  bang_visualizer.show_blocks(directory)
986  bang_visualizer.show_dendrogram(dendrogram)
987  bang_visualizer.show_clusters(data, clusters, noise)
988  @endcode
989 
990  There is visualization of BANG-clustering of three-dimensional data 'chainlink'. BANG-blocks that were formed during
991  processing are shown on following figure. The darkest color means highest density, blocks that does not cover points
992  are transparent:
993  @image html bang_blocks_chainlink.png "Fig. 1. BANG-blocks that cover input data."
994 
995  Here is obtained dendrogram that can be used for further analysis to improve clustering results:
996  @image html bang_dendrogram_chainlink.png "Fig. 2. BANG dendrogram where the X-axis contains BANG-blocks, the Y-axis contains density."
997 
998  BANG clustering result of 'chainlink' data:
999  @image html bang_clustering_chainlink.png "Fig. 3. BANG clustering result. Data: 'chainlink'."
1000 
1001  """
1002 
1003  def __init__(self, data, levels, ccore=False, **kwargs):
1004  """!
1005  @brief Create BANG clustering algorithm.
1006 
1007  @param[in] data (list): Input data (list of points) that should be clustered.
1008  @param[in] levels (uint): Amount of levels in tree that is used for splitting (how many times block should be
1009  split). For example, if amount of levels is two then surface will be divided into two blocks and
1010  each obtained block will be divided into blocks also.
1011  @param[in] ccore (bool): Reserved positional argument - not used yet.
1012  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe').
1013 
1014  <b>Keyword Args:</b><br>
1015  - density_threshold (double): If block density is smaller than this value then contained data by this
1016  block is considered as a noise and its points as outliers. Block density is defined by amount of
1017  points in block divided by block volume: <i>amount_block_points</i>/<i>block_volume</i>. By default
1018  it is 0.0 - means than only empty blocks are considered as noise. Be aware that this parameter is used
1019  with parameter 'amount_threshold' - the maximum threshold is considered during processing.
1020  - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is
1021  considered as a noise and there is no need to split it till the last level. Be aware that this parameter
1022  is used with parameter 'density_threshold' - the maximum threshold is considered during processing.
1023 
1024  """
1025  self.__data = data
1026  self.__levels = levels
1027  self.__directory = None
1028  self.__clusters = []
1029  self.__noise = []
1030  self.__cluster_blocks = []
1031  self.__dendrogram = []
1032  self.__density_threshold = kwargs.get('density_threshold', 0.0)
1033  self.__amount_threshold = kwargs.get('amount_threshold', 0)
1034  self.__ccore = ccore
1035 
1036  self.__validate_arguments()
1037 
1038 
1039  def process(self):
1040  """!
1041  @brief Performs clustering process in line with rules of BANG clustering algorithm.
1042 
1043  @return (bang) Returns itself (BANG instance).
1044 
1045  @see get_clusters()
1046  @see get_noise()
1047  @see get_directory()
1048  @see get_dendrogram()
1049 
1050  """
1051  self.__directory = bang_directory(self.__data, self.__levels,
1052  density_threshold=self.__density_threshold,
1053  amount_threshold=self.__amount_threshold)
1054  self.__allocate_clusters()
1055 
1056  return self
1057 
1058 
1059  def get_clusters(self):
1060  """!
1061  @brief Returns allocated clusters.
1062 
1063  @remark Allocated clusters are returned only after data processing (method process()). Otherwise empty list is returned.
1064 
1065  @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
1066 
1067  @see process()
1068  @see get_noise()
1069 
1070  """
1071  return self.__clusters
1072 
1073 
1074  def get_noise(self):
1075  """!
1076  @brief Returns allocated noise.
1077 
1078  @remark Allocated noise is returned only after data processing (method process()). Otherwise empty list is returned.
1079 
1080  @return (list) List of indexes that are marked as a noise.
1081 
1082  @see process()
1083  @see get_clusters()
1084 
1085  """
1086  return self.__noise
1087 
1088 
1089  def get_directory(self):
1090  """!
1091  @brief Returns grid directory that describes grid of the processed data.
1092 
1093  @remark Grid directory is returned only after data processing (method process()). Otherwise None value is returned.
1094 
1095  @return (bang_directory) BANG directory that describes grid of process data.
1096 
1097  @see process()
1098 
1099  """
1100  return self.__directory
1101 
1102 
1103  def get_dendrogram(self):
1104  """!
1105  @brief Returns dendrogram of clusters.
1106  @details Dendrogram is created in following way: the density indices of all regions are calculated and sorted
1107  in decreasing order for each cluster during clustering process.
1108 
1109  @remark Dendrogram is returned only after data processing (method process()). Otherwise empty list is returned.
1110 
1111  """
1112  return self.__dendrogram
1113 
1114 
1116  """!
1117  @brief Returns clustering result representation type that indicate how clusters are encoded.
1118 
1119  @return (type_encoding) Clustering result representation.
1120 
1121  @see get_clusters()
1122 
1123  """
1124 
1125  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
1126 
1127 
1128  def __validate_arguments(self):
1129  """!
1130  @brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception
1131  is thrown.
1132 
1133  """
1134  if self.__levels <= 0:
1135  raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % self.__levels)
1136 
1137  if len(self.__data) == 0:
1138  raise ValueError("Empty input data. Data should contain at least one point.")
1139 
1140  if self.__density_threshold < 0:
1141  raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % self.__density_threshold)
1142 
1143 
1144  def __allocate_clusters(self):
1145  """!
1146  @brief Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
1147 
1148  """
1149  leaf_blocks = self.__directory.get_leafs()
1150  unhandled_block_indexes = set([i for i in range(len(leaf_blocks)) if leaf_blocks[i].get_density() > self.__density_threshold])
1151 
1152  current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
1153  cluster_index = 0
1154 
1155  while current_block is not None:
1156  if current_block.get_density() <= self.__density_threshold or len(current_block) <= self.__amount_threshold:
1157  break
1158 
1159  self.__expand_cluster_block(current_block, cluster_index, leaf_blocks, unhandled_block_indexes)
1160 
1161  current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
1162  cluster_index += 1
1163 
1164  self.__store_clustering_results(cluster_index, leaf_blocks)
1165 
1166 
1167  def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
1168  """!
1169  @brief Expand cluster from specific block that is considered as a central block.
1170 
1171  @param[in] block (bang_block): Block that is considered as a central block for cluster.
1172  @param[in] cluster_index (uint): Index of cluster that is assigned to blocks that forms new cluster.
1173  @param[in] leaf_blocks (list): Leaf BANG-blocks that are considered during cluster formation.
1174  @param[in] unhandled_block_indexes (set): Set of candidates (BANG block indexes) to become a cluster member. The
1175  parameter helps to reduce traversing among BANG-block providing only restricted set of block that
1176  should be considered.
1177 
1178  """
1179 
1180  block.set_cluster(cluster_index)
1181  self.__update_cluster_dendrogram(cluster_index, [block])
1182 
1183  neighbors = self.__find_block_neighbors(block, leaf_blocks, unhandled_block_indexes)
1184  self.__update_cluster_dendrogram(cluster_index, neighbors)
1185 
1186  for neighbor in neighbors:
1187  neighbor.set_cluster(cluster_index)
1188  neighbor_neighbors = self.__find_block_neighbors(neighbor, leaf_blocks, unhandled_block_indexes)
1189  self.__update_cluster_dendrogram(cluster_index, neighbor_neighbors)
1190 
1191  neighbors += neighbor_neighbors
1192 
1193 
1194  def __store_clustering_results(self, amount_clusters, leaf_blocks):
1195  """!
1196  @brief Stores clustering results in a convenient way.
1197 
1198  @param[in] amount_clusters (uint): Amount of cluster that was allocated during processing.
1199  @param[in] leaf_blocks (list): Leaf BANG-blocks (the smallest cells).
1200 
1201  """
1202  self.__clusters = [[] for _ in range(amount_clusters)]
1203  for block in leaf_blocks:
1204  index = block.get_cluster()
1205 
1206  if index is not None:
1207  self.__clusters[index] += block.get_points()
1208  else:
1209  self.__noise += block.get_points()
1210 
1211  self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
1212  self.__noise = list(set(self.__noise))
1213 
1214 
1215  def __find_block_center(self, level_blocks, unhandled_block_indexes):
1216  """!
1217  @brief Search block that is cluster center for new cluster.
1218 
1219  @return (bang_block) Central block for new cluster, if cluster is not found then None value is returned.
1220 
1221  """
1222  for i in reversed(range(len(level_blocks))):
1223  if level_blocks[i].get_density() <= self.__density_threshold:
1224  return None
1225 
1226  if level_blocks[i].get_cluster() is None:
1227  unhandled_block_indexes.remove(i)
1228  return level_blocks[i]
1229 
1230  return None
1231 
1232 
1233  def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
1234  """!
1235  @brief Search block neighbors that are parts of new clusters (density is greater than threshold and that are
1236  not cluster members yet), other neighbors are ignored.
1237 
1238  @param[in] block (bang_block): BANG-block for which neighbors should be found (which can be part of cluster).
1239  @param[in] level_blocks (list): BANG-blocks on specific level.
1240  @param[in] unhandled_block_indexes (set): Blocks that have not been processed yet.
1241 
1242  @return (list) Block neighbors that can become part of cluster.
1243 
1244  """
1245  neighbors = []
1246 
1247  handled_block_indexes = []
1248  for unhandled_index in unhandled_block_indexes:
1249  if block.is_neighbor(level_blocks[unhandled_index]):
1250  handled_block_indexes.append(unhandled_index)
1251  neighbors.append(level_blocks[unhandled_index])
1252 
1253  # Maximum number of neighbors is eight
1254  if len(neighbors) == 8:
1255  break
1256 
1257  for handled_index in handled_block_indexes:
1258  unhandled_block_indexes.remove(handled_index)
1259 
1260  return neighbors
1261 
1262 
1263  def __update_cluster_dendrogram(self, index_cluster, blocks):
1264  """!
1265  @brief Append clustered blocks to dendrogram.
1266 
1267  @param[in] index_cluster (uint): Cluster index that was assigned to blocks.
1268  @param[in] blocks (list): Blocks that were clustered.
1269 
1270  """
1271  if len(self.__dendrogram) <= index_cluster:
1272  self.__dendrogram.append([])
1273 
1274  blocks = sorted(blocks, key=lambda block: block.get_density(), reverse=True)
1275  self.__dendrogram[index_cluster] += blocks
1276 
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:359
pyclustering module for cluster analysis.
Definition: __init__.py:1
def __update_cluster_dendrogram(self, index_cluster, blocks)
Append clustered blocks to dendrogram.
Definition: bang.py:1263
def get_clusters(self)
Returns allocated clusters.
Definition: bang.py:1059
def get_noise(self)
Returns allocated noise.
Definition: bang.py:1074
def set_cluster(self, index)
Assign cluster to the BANG-block by index.
Definition: bang.py:852
def __draw_clusters(self)
Display clusters and outliers using different colors.
Definition: bang.py:335
def __draw_block(self, block, block_alpha=0.0)
Display single BANG block on axis.
Definition: bang.py:299
def get_points(self)
Return points that covers by the BANG-block.
Definition: bang.py:839
def __calculate_volume(self)
Calculates volume of current spatial block.
Definition: bang.py:731
BANG-block that represent spatial region in data space.
Definition: bang.py:752
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
Module for representing clustering results.
Definition: encoder.py:1
def is_neighbor(self, block)
Performs calculation to check whether specified block is neighbor to the current. ...
Definition: bang.py:862
def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes)
Expand cluster from specific block that is considered as a central block.
Definition: bang.py:1167
def get_height(self)
Returns height of BANG tree where blocks are stored.
Definition: bang.py:496
Colors used by pyclustering library for visualization.
Definition: color.py:1
def is_neighbor(self, block)
Performs calculation to identify whether specified block is neighbor of current block.
Definition: bang.py:687
Visualizer of BANG algorithm&#39;s results.
Definition: bang.py:48
def get_region(self)
Returns region number of BANG-block.
Definition: bang.py:796
def __init__(self, data, levels, kwargs)
Create BANG directory - basically tree structure with direct access to leafs.
Definition: bang.py:422
def __contains__(self, point)
Point is considered as contained if it lies in block (belong to it).
Definition: bang.py:631
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: bang.py:1115
def __get_amount_points(self)
Count covered points by the BANG-block and if cache is enable then covered points are stored...
Definition: bang.py:911
def __init__(self, data, region, level, space_block, cache_points=False)
Create BANG-block.
Definition: bang.py:757
def get_leafs(self)
Return leafs - the smallest blocks.
Definition: bang.py:472
Class implements BANG grid based clustering algorithm.
Definition: bang.py:955
def __create_directory(self)
Create BANG directory as a tree with separate storage for leafs.
Definition: bang.py:506
def __str__(self)
Returns string representation of BANG-block using region number and level where block is located...
Definition: bang.py:780
def __draw_cluster(self, data, cluster, color, marker)
Draw 2-D single cluster on axis using specified color and marker.
Definition: bang.py:348
def __find_block_center(self, level_blocks, unhandled_block_indexes)
Search block that is cluster center for new cluster.
Definition: bang.py:1215
def __allocate_clusters(self)
Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
Definition: bang.py:1144
def get_dendrogram(self)
Returns dendrogram of clusters.
Definition: bang.py:1103
Geometrical description of BANG block in data space.
Definition: bang.py:599
def __len__(self)
Returns block size defined by amount of points that are contained by this block.
Definition: bang.py:788
Provides service for creating 2-D animation using BANG clustering results.
Definition: bang.py:223
def get_corners(self)
Return spatial description of current block.
Definition: bang.py:645
def __split_block(self, block, split_dimension, cache_require, current_level_blocks)
Split specific block in specified dimension.
Definition: bang.py:577
def __increment_block(self)
Increment BANG block safely by updating block index, level and level block.
Definition: bang.py:285
def get_data(self)
Return data that is stored in the directory.
Definition: bang.py:462
def __str__(self)
Returns string block description.
Definition: bang.py:621
def get_directory(self)
Returns grid directory that describes grid of the processed data.
Definition: bang.py:1089
def show_blocks(directory)
Show BANG-blocks (leafs only) in data space.
Definition: bang.py:59
def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes)
Search block neighbors that are parts of new clusters (density is greater than threshold and that are...
Definition: bang.py:1233
def __calculate_density(self, amount_points)
Calculates BANG-block density.
Definition: bang.py:895
def __calculate_neighborhood(self, block_max_corner)
Calculates neighborhood score that defined whether blocks are neighbors.
Definition: bang.py:708
def __init__(self, data, levels, ccore=False, kwargs)
Create BANG clustering algorithm.
Definition: bang.py:1003
def __build_level(self, previous_level_blocks, level)
Build new level of directory.
Definition: bang.py:553
def __store_level_blocks(self, level_blocks)
Store level blocks if observing is enabled.
Definition: bang.py:525
def __init__(self, directory, clusters)
Creates BANG animator instance.
Definition: bang.py:253
def __cache_covered_data(self)
Cache covered data.
Definition: bang.py:927
def __validate_arguments(self)
Check correctness of input arguments and throw exception if incorrect is found.
Definition: bang.py:276
def get_volume(self)
Returns volume of current block.
Definition: bang.py:655
def split(self, dimension)
Split current block into two spatial blocks in specified dimension.
Definition: bang.py:667
def get_density(self)
Returns density of the BANG-block.
Definition: bang.py:808
def __build_directory_levels(self)
Build levels of direction if amount of level is greater than one.
Definition: bang.py:538
def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None)
Animates clustering process that is performed by BANG algorithm.
Definition: bang.py:357
def show_clusters(data, clusters, noise=None)
Display BANG clustering results.
Definition: bang.py:119
def __init__(self, max_corner, min_corner)
Creates spatial block in data space.
Definition: bang.py:608
def __len__(self)
Returns amount of blocks that is stored in the directory.
Definition: bang.py:452
def __draw_leaf_density(self)
Display densities by filling blocks by appropriate colors.
Definition: bang.py:320
def split(self, split_dimension, cache_points)
Split BANG-block into two new blocks in specified dimension.
Definition: bang.py:874
def show_dendrogram(dendrogram)
Display dendrogram of BANG-blocks.
Definition: bang.py:89
def process(self)
Performs clustering process in line with rules of BANG clustering algorithm.
Definition: bang.py:1039
def __validate_arguments(self)
Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception ...
Definition: bang.py:1128
BANG directory stores BANG-blocks that represents grid in data space.
Definition: bang.py:415
def get_level(self, level)
Returns BANG blocks on the specific level.
Definition: bang.py:484
def __cache_point(self, index)
Store index points.
Definition: bang.py:940
def __store_clustering_results(self, amount_clusters, leaf_blocks)
Stores clustering results in a convenient way.
Definition: bang.py:1194
def get_spatial_block(self)
Return spatial block - BANG-block description in data space.
Definition: bang.py:829
def get_cluster(self)
Return index of cluster to which the BANG-block belongs to.
Definition: bang.py:818