3 @brief Cluster analysis algorithm: BANG. 4 @details Implementation based on paper @cite inproceedings::bang::1. 6 @authors Andrei Novikov (pyclustering@yandex.ru) 8 @copyright GNU Public License 10 @cond GNU_PUBLIC_LICENSE 11 PyClustering is free software: you can redistribute it and/or modify 12 it under the terms of the GNU General Public License as published by 13 the Free Software Foundation, either version 3 of the License, or 14 (at your option) any later version. 16 PyClustering is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License for more details. 21 You should have received a copy of the GNU General Public License 22 along with this program. If not, see <http://www.gnu.org/licenses/>. 32 import matplotlib.gridspec
as gridspec
33 import matplotlib.pyplot
as plt
34 import matplotlib.patches
as patches
35 import matplotlib.animation
as animation
36 except Exception
as error_instance:
37 warnings.warn(
"Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization " 38 "functionality is not available (details: '%s')." % str(error_instance))
50 @brief Visualizer of BANG algorithm's results. 51 @details BANG visualizer provides visualization services that are specific for BANG algorithm. 55 __maximum_density_alpha = 0.6
61 @brief Show BANG-blocks (leafs only) in data space. 62 @details BANG-blocks represents grid that was used for clustering process. 64 @param[in] directory (bang_directory): Directory that was created by BANG algorithm during clustering process. 68 dimension = len(directory.get_data()[0])
72 amount_canvases = int(dimension * (dimension - 1) / 2)
75 grid_spec = gridspec.GridSpec(1, amount_canvases)
77 pairs = list(itertools.combinations(range(dimension), 2))
78 if len(pairs) == 0: pairs = [(0, 0)]
80 for index
in range(amount_canvases):
81 ax = figure.add_subplot(grid_spec[index])
82 bang_visualizer.__draw_blocks(ax, directory.get_leafs(), pairs[index])
83 bang_visualizer.__draw_two_dimension_data(ax, directory.get_data(), pairs[index])
91 @brief Display dendrogram of BANG-blocks. 93 @param[in] dendrogram (list): List representation of dendrogram of BANG-blocks. 95 @see bang.get_dendrogram() 99 axis = plt.subplot(1, 1, 1)
102 for index_cluster
in range(len(dendrogram)):
103 densities = [ block.get_density()
for block
in dendrogram[index_cluster] ]
104 xrange = range(current_position, current_position + len(densities))
106 axis.bar(xrange, densities, 1.0, linewidth=0.0, color=color_list.get_color(index_cluster))
108 current_position += len(densities)
110 axis.set_ylabel(
"density")
111 axis.set_xlabel(
"block")
112 axis.xaxis.set_ticklabels([])
114 plt.xlim([-0.5, current_position - 0.5])
121 @brief Display BANG clustering results. 123 @param[in] data (list): Dataset that was used for clustering. 124 @param[in] clusters (array_like): Clusters that were allocated by the algorithm. 125 @param[in] noise (array_like): Noise that were allocated by the algorithm. 129 visualizer.append_clusters(clusters, data)
130 visualizer.append_cluster(noise
or [], data, marker=
'x')
135 def __draw_two_dimension_data(ax, data, pair):
137 @brief Display data in two-dimensional canvas. 139 @param[in] ax (Axis): Canvas where data should be displayed. 140 @param[in] data (list): Data points that should be displayed. 141 @param[in] pair (tuple): Pair of dimension indexes. 144 ax.set_xlabel(
"x%d" % pair[0])
145 ax.set_ylabel(
"x%d" % pair[1])
149 ax.plot(point[pair[0]], point[pair[1]], color=
'red', marker=
'.')
151 ax.plot(point[pair[0]], 0, color=
'red', marker=
'.')
152 ax.yaxis.set_ticklabels([])
156 def __draw_blocks(ax, blocks, pair):
158 @brief Display BANG-blocks on specified figure. 160 @param[in] ax (Axis): Axis where bang-blocks should be displayed. 161 @param[in] blocks (list): List of blocks that should be displyed. 162 @param[in] pair (tuple): Pair of coordinate index that should be displayed. 167 density_scale = blocks[-1].get_density()
169 bang_visualizer.__draw_block(ax, pair, block, density_scale)
173 def __draw_block(ax, pair, block, density_scale):
175 @brief Display BANG-block on the specified ax. 177 @param[in] ax (Axis): Axis where block should be displayed. 178 @param[in] pair (tuple): Pair of coordinate index that should be displayed. 179 @param[in] block (bang_block): BANG-block that should be displayed. 180 @param[in] density_scale (double): Max density to display density of the block by appropriate tone. 183 max_corner, min_corner = bang_visualizer.__get_rectangle_description(block, pair)
185 belong_cluster = block.get_cluster()
is not None 187 if density_scale != 0.0:
188 density_scale = bang_visualizer.__maximum_density_alpha * block.get_density() / density_scale
190 face_color = matplotlib.colors.to_rgba(
'blue', alpha=density_scale)
191 edge_color = matplotlib.colors.to_rgba(
'black', alpha=1.0)
193 rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
195 facecolor=face_color,
196 edgecolor=edge_color,
202 def __get_rectangle_description(block, pair):
204 @brief Create rectangle description for block in specific dimension. 206 @param[in] pair (tuple): Pair of coordinate index that should be displayed. 207 @param[in] block (bang_block): BANG-block that should be displayed 209 @return (tuple) Pair of corners that describes rectangle. 212 max_corner, min_corner = block.get_spatial_block().get_corners()
214 max_corner = [max_corner[pair[0]], max_corner[pair[1]]]
215 min_corner = [min_corner[pair[0]], min_corner[pair[1]]]
218 max_corner[1], min_corner[1] = 1.0, -1.0
220 return max_corner, min_corner
225 @brief Provides service for creating 2-D animation using BANG clustering results. 226 @details The animator does not support visualization of clustering process where non 2-dimensional was used. 228 Code example of animation of BANG clustering process: 230 from pyclustering.cluster.bang import bang, bang_animator 231 from pyclustering.utils import read_sample 232 from pyclustering.samples.definitions import FCPS_SAMPLES 234 # Read data two dimensional data. 235 data = read_sample(FCPS_SAMPLES.SAMPLE_LSUN) 237 # Create instance of BANG algorithm. 238 bang_instance = bang(data, 9) 239 bang_instance.process() 241 # Obtain clustering results. 242 clusters = bang_instance.get_clusters() 243 noise = bang_instance.get_noise() 244 directory = bang_instance.get_directory() 246 # Create BANG animation using class 'bang_animator': 247 animator = bang_animator(directory, clusters) 255 @brief Creates BANG animator instance. 257 @param[in] directory (bang_directory): BANG directory that was formed during BANG clustering process. 258 @param[in] clusters (list): Allocated clusters during BANG clustering process. 276 def __validate_arguments(self):
278 @brief Check correctness of input arguments and throw exception if incorrect is found. 282 raise ValueError(
"Impossible to animate BANG clustering process for non 2D data.")
285 def __increment_block(self):
287 @brief Increment BANG block safely by updating block index, level and level block. 299 def __draw_block(self, block, block_alpha=0.0):
301 @brief Display single BANG block on axis. 303 @param[in] block (bang_block): BANG block that should be displayed. 304 @param[in] block_alpha (double): Transparency level - value of alpha. 307 max_corner, min_corner = block.get_spatial_block().get_corners()
309 face_color = matplotlib.colors.to_rgba(
'blue', alpha=block_alpha)
310 edge_color = matplotlib.colors.to_rgba(
'black', alpha=1.0)
312 rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
314 facecolor=face_color,
315 edgecolor=edge_color,
317 self.
__ax.add_patch(rect)
320 def __draw_leaf_density(self):
322 @brief Display densities by filling blocks by appropriate colors. 326 density_scale = leafs[-1].get_density()
328 if density_scale == 0.0: density_scale = 1.0
331 alpha = 0.8 * block.get_density() / density_scale
335 def __draw_clusters(self):
337 @brief Display clusters and outliers using different colors. 341 for index_cluster
in range(len(self.
__clusters)):
342 color = color_list.get_color(index_cluster)
348 def __draw_cluster(self, data, cluster, color, marker):
350 @brief Draw 2-D single cluster on axis using specified color and marker. 354 self.
__ax.plot(data[item][0], data[item][1], color=color, marker=marker)
357 def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None):
359 @brief Animates clustering process that is performed by BANG algorithm. 361 @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only). 362 @param[in] movie_fps (uint): Defines frames per second (for rendering movie only). 363 @param[in] movie_filename (string): If it is specified then animation will be stored to file that is specified in this parameter. 369 self.
__figure.suptitle(
"BANG algorithm", fontsize=18, fontweight=
'bold')
372 self.
__ax.plot(point[0], point[1], color=
'red', marker=
'.')
374 return frame_generation(0)
377 def frame_generation(index_iteration):
393 self.
__figure.suptitle(
"BANG algorithm", fontsize=18, fontweight=
'bold')
403 cluster_animation = animation.FuncAnimation(self.
__figure, frame_generation, iterations,
404 interval=animation_velocity,
405 init_func=init_frame,
408 if movie_filename
is not None:
409 cluster_animation.save(movie_filename, writer =
'ffmpeg', fps = movie_fps, bitrate = 3500)
417 @brief BANG directory stores BANG-blocks that represents grid in data space. 418 @details The directory build BANG-blocks in binary tree manner. Leafs of the tree stored separately to provide 419 a direct access to the leafs that should be analysed. Leafs cache data-points. 424 @brief Create BANG directory - basically tree structure with direct access to leafs. 426 @param[in] data (list): Input data that is clustered. 427 @param[in] levels (uint): Height of the tree of blocks. 428 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe'). 430 <b>Keyword Args:</b><br> 431 - observe (bool): If 'True' then blocks on each level are stored. 432 - density_threshold (double): The lowest level of density when contained data in bang-block is 433 considered as a noise and there is no need to split it till the last level. Be aware that this 434 parameter is used with 'amount_threshold' parameter. 435 - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is 436 considered as a noise and there is no need to split it till the last level. 447 self.
__observe = kwargs.get(
'observe',
True)
454 @brief Returns amount of blocks that is stored in the directory 456 @return (uint) Amount of blocks in the BANG directory. 464 @brief Return data that is stored in the directory. 466 @return (list) List of points that represents stored data. 474 @brief Return leafs - the smallest blocks. 475 @details Some leafs can be bigger than others because splitting is not performed for blocks whose density is 478 @return (list) List of blocks that are leafs of BANG directory. 486 @brief Returns BANG blocks on the specific level. 488 @param[in] level (uint): Level of tree where BANG blocks are located. 490 @return (list) List of BANG blocks on the specific level. 498 @brief Returns height of BANG tree where blocks are stored. 500 @return (uint) Height of BANG tree. 506 def __create_directory(self):
508 @brief Create BANG directory as a tree with separate storage for leafs. 512 min_corner, max_corner = data_corners(self.
__data)
515 cache_require = (self.
__levels == 1)
525 def __store_level_blocks(self, level_blocks):
527 @brief Store level blocks if observing is enabled. 529 @param[in] level_blocks (list): Created blocks on a new level. 532 self.
__size += len(level_blocks)
538 def __build_directory_levels(self):
540 @brief Build levels of direction if amount of level is greater than one. 544 previous_level_blocks = [ self.
__root ]
546 for level
in range(1, self.
__levels):
547 previous_level_blocks = self.
__build_level(previous_level_blocks, level)
550 self.
__leafs = sorted(self.
__leafs, key=
lambda block: block.get_density())
553 def __build_level(self, previous_level_blocks, level):
555 @brief Build new level of directory. 557 @param[in] previous_level_blocks (list): BANG-blocks on the previous level. 558 @param[in] level (uint): Level number that should be built. 560 @return (list) New block on the specified level. 563 current_level_blocks = []
565 split_dimension = level % len(self.
__data[0])
566 cache_require = (level == self.
__levels - 1)
568 for block
in previous_level_blocks:
569 self.
__split_block(block, split_dimension, cache_require, current_level_blocks)
572 self.
__leafs += current_level_blocks
574 return current_level_blocks
577 def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
579 @brief Split specific block in specified dimension. 580 @details Split is not performed for block whose density is lower than threshold value, such blocks are putted to 583 @param[in] block (bang_block): BANG-block that should be split. 584 @param[in] split_dimension (uint): Dimension at which splitting should be performed. 585 @param[in] cache_require (bool): Defines when points in cache should be stored during density calculation. 586 @param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added. 593 left, right = block.split(split_dimension, cache_require)
594 current_level_blocks.append(left)
595 current_level_blocks.append(right)
600 @brief Geometrical description of BANG block in data space. 601 @details Provides services related to spatial functionality and used by bang_block 609 @brief Creates spatial block in data space. 611 @param[in] max_corner (array_like): Maximum corner coordinates of the block. 612 @param[in] min_corner (array_like): Minimal corner coordinates of the block. 622 @brief Returns string block description. 624 @return String representation of the block. 632 @brief Point is considered as contained if it lies in block (belong to it). 634 @return (bool) True if point is in block, otherwise False. 637 for i
in range(len(point)):
646 @brief Return spatial description of current block. 648 @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner). 656 @brief Returns volume of current block. 657 @details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle, 658 for 3D is volume of 3D figure, and for ND is volume of ND figure. 660 @return (double) Volume of current block. 668 @brief Split current block into two spatial blocks in specified dimension. 670 @param[in] dimension (uint): Dimension where current block should be split. 672 @return (tuple) Pair of new split blocks from current block. 680 first_max_corner[dimension] = split_border
681 second_min_corner[dimension] = split_border
688 @brief Performs calculation to identify whether specified block is neighbor of current block. 689 @details It also considers diagonal blocks as neighbors. 691 @param[in] block (spatial_block): Another block that is check whether it is neighbor. 693 @return (bool) True is blocks are neighbors, False otherwise. 696 if block
is not self:
697 block_max_corner, _ = block.get_corners()
698 dimension = len(block_max_corner)
701 if neighborhood_score == dimension:
707 def __calculate_neighborhood(self, block_max_corner):
709 @brief Calculates neighborhood score that defined whether blocks are neighbors. 711 @param[in] block_max_corner (list): Maximum coordinates of other block. 713 @return (uint) Neighborhood score. 716 dimension = len(block_max_corner)
720 neighborhood_score = 0
721 for i
in range(dimension):
724 if diff <= length_edges[i] + length_edges[i] * 0.0001:
725 neighborhood_score += 1
727 return neighborhood_score
730 def __calculate_volume(self):
732 @brief Calculates volume of current spatial block. 733 @details If empty dimension is detected (where all points has the same value) then such dimension is ignored 734 during calculation of volume. 736 @return (double) Volume of current spatial block. 744 if side_length != 0.0:
745 if volume == 0.0: volume = side_length
746 else: volume *= side_length
753 @brief BANG-block that represent spatial region in data space. 756 def __init__(self, data, region, level, space_block, cache_points=False):
758 @brief Create BANG-block. 760 @param[in] data (list): List of points that are processed. 761 @param[in] region (uint): Region number - unique value on a level. 762 @param[in] level (uint): Level number where block is created. 763 @param[in] space_block (spatial_block): Spatial block description in data space. 764 @param[in] cache_points (bool): if True then points are stored in memory (used for leaf blocks). 781 @brief Returns string representation of BANG-block using region number and level where block is located. 789 @brief Returns block size defined by amount of points that are contained by this block. 797 @brief Returns region number of BANG-block. 798 @details Region number is unique on among region numbers on a directory level. Pair of region number and level 799 is unique for all directory. 801 @return (uint) Region number. 809 @brief Returns density of the BANG-block. 811 @return (double) BANG-block density. 819 @brief Return index of cluster to which the BANG-block belongs to. 820 @details Index of cluster may have None value if the block was not assigned to any cluster. 822 @return (uint) Index of cluster or None if the block does not belong to any cluster. 830 @brief Return spatial block - BANG-block description in data space. 832 @return (spatial_block) Spatial block of the BANG-block. 840 @brief Return points that covers by the BANG-block. 842 @return (list) List of point indexes that are covered by the block. 853 @brief Assign cluster to the BANG-block by index. 855 @param[in] index (uint): Index cluster that is assigned to BANG-block. 863 @brief Performs calculation to check whether specified block is neighbor to the current. 865 @param[in] block (bang_block): Other BANG-block that should be checked for neighborhood. 867 @return (bool) True if blocks are neighbors, False if blocks are not neighbors. 873 def split(self, split_dimension, cache_points):
875 @brief Split BANG-block into two new blocks in specified dimension. 877 @param[in] split_dimension (uint): Dimension where block should be split. 878 @param[in] cache_points (bool): If True then covered points are cached. Used for leaf blocks. 880 @return (tuple) Pair of BANG-block that were formed from the current. 894 def __calculate_density(self, amount_points):
896 @brief Calculates BANG-block density. 898 @param[in] amount_points (uint): Amount of points in block. 900 @return (double) BANG-block density. 905 return amount_points / volume
910 def __get_amount_points(self):
912 @brief Count covered points by the BANG-block and if cache is enable then covered points are stored. 914 @return (uint) Amount of covered points. 918 for index
in range(len(self.
__data)):
926 def __cache_covered_data(self):
928 @brief Cache covered data. 934 for index_point
in range(len(self.
__data)):
939 def __cache_point(self, index):
941 @brief Store index points. 943 @param[in] index (uint): Index point that should be stored. 956 @brief Class implements BANG grid based clustering algorithm. 957 @details BANG clustering algorithms uses a multidimensional grid structure to organize the value space surrounding 958 the pattern values. The patterns are grouped into blocks and clustered with respect to the blocks by 959 a topological neighbor search algorithm @cite inproceedings::bang::1. 961 Code example of BANG usage: 963 from pyclustering.cluster.bang import bang, bang_visualizer 964 from pyclustering.utils import read_sample 965 from pyclustering.samples.definitions import FCPS_SAMPLES 967 # Read data three dimensional data. 968 data = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK) 970 # Prepare algorithm's parameters. 973 # Create instance of BANG algorithm. 974 bang_instance = bang(data, levels) 975 bang_instance.process() 977 # Obtain clustering results. 978 clusters = bang_instance.get_clusters() 979 noise = bang_instance.get_noise() 980 directory = bang_instance.get_directory() 981 dendrogram = bang_instance.get_dendrogram() 983 # Visualize BANG clustering results. 984 bang_visualizer.show_blocks(directory) 985 bang_visualizer.show_dendrogram(dendrogram) 986 bang_visualizer.show_clusters(data, clusters, noise) 989 There is visualization of BANG-clustering of three-dimensional data 'chainlink'. BANG-blocks that were formed during 990 processing are shown on following figure. The darkest color means highest density, blocks that does not cover points 992 @image html bang_blocks_chainlink.png "Fig. 1. BANG-blocks that cover input data." 994 Here is obtained dendrogram that can be used for further analysis to improve clustering results: 995 @image html bang_dendrogram_chainlink.png "Fig. 2. BANG dendrogram where the X-axis contains BANG-blocks, the Y-axis contains density." 997 BANG clustering result of 'chainlink' data: 998 @image html bang_clustering_chainlink.png "Fig. 3. BANG clustering result. Data: 'chainlink'." 1002 def __init__(self, data, levels, ccore=False, **kwargs):
1004 @brief Create BANG clustering algorithm. 1006 @param[in] data (list): Input data (list of points) that should be clustered. 1007 @param[in] levels (uint): Amount of levels in tree that is used for splitting (how many times block should be 1008 split). For example, if amount of levels is two then surface will be divided into two blocks and 1009 each obtained block will be divided into blocks also. 1010 @param[in] ccore (bool): Reserved positional argument - not used yet. 1011 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe'). 1013 <b>Keyword Args:</b><br> 1014 - density_threshold (double): If block density is smaller than this value then contained data by this 1015 block is considered as a noise and its points as outliers. Block density is defined by amount of 1016 points in block divided by block volume: <i>amount_block_points</i>/<i>block_volume</i>. By default 1017 it is 0.0 - means than only empty blocks are considered as noise. Be aware that this parameter is used 1018 with parameter 'amount_threshold' - the maximum threshold is considered during processing. 1019 - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is 1020 considered as a noise and there is no need to split it till the last level. Be aware that this parameter 1021 is used with parameter 'density_threshold' - the maximum threshold is considered during processing. 1040 @brief Performs clustering process in line with rules of BANG clustering algorithm. 1042 @return (bang) Returns itself (BANG instance). 1046 @see get_directory() 1047 @see get_dendrogram() 1060 @brief Returns allocated clusters. 1062 @remark Allocated clusters are returned only after data processing (method process()). Otherwise empty list is returned. 1064 @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data. 1075 @brief Returns allocated noise. 1077 @remark Allocated noise is returned only after data processing (method process()). Otherwise empty list is returned. 1079 @return (list) List of indexes that are marked as a noise. 1090 @brief Returns grid directory that describes grid of the processed data. 1092 @remark Grid directory is returned only after data processing (method process()). Otherwise None value is returned. 1094 @return (bang_directory) BANG directory that describes grid of process data. 1104 @brief Returns dendrogram of clusters. 1105 @details Dendrogram is created in following way: the density indices of all regions are calculated and sorted 1106 in decreasing order for each cluster during clustering process. 1108 @remark Dendrogram is returned only after data processing (method process()). Otherwise empty list is returned. 1116 @brief Returns clustering result representation type that indicate how clusters are encoded. 1118 @return (type_encoding) Clustering result representation. 1124 return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
1127 def __validate_arguments(self):
1129 @brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception 1133 if len(self.
__data) == 0:
1134 raise ValueError(
"Input data is empty (size: '%d')." % len(self.
__data))
1137 raise ValueError(
"Height of the tree should be greater than 0 (current value: '%d')." % self.
__levels)
1140 raise ValueError(
"Density threshold should be greater or equal to 0 (current value: '%d')." %
1144 raise ValueError(
"Amount of points threshold should be greater than 0 (current value: '%d')" %
1148 def __allocate_clusters(self):
1150 @brief Performs cluster allocation using leafs of tree in BANG directory (the smallest cells). 1154 unhandled_block_indexes = set([i
for i
in range(len(leaf_blocks))
if leaf_blocks[i].get_density() > self.
__density_threshold])
1159 while current_block
is not None:
1171 def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
1173 @brief Expand cluster from specific block that is considered as a central block. 1175 @param[in] block (bang_block): Block that is considered as a central block for cluster. 1176 @param[in] cluster_index (uint): Index of cluster that is assigned to blocks that forms new cluster. 1177 @param[in] leaf_blocks (list): Leaf BANG-blocks that are considered during cluster formation. 1178 @param[in] unhandled_block_indexes (set): Set of candidates (BANG block indexes) to become a cluster member. The 1179 parameter helps to reduce traversing among BANG-block providing only restricted set of block that 1180 should be considered. 1184 block.set_cluster(cluster_index)
1190 for neighbor
in neighbors:
1191 neighbor.set_cluster(cluster_index)
1195 neighbors += neighbor_neighbors
1198 def __store_clustering_results(self, amount_clusters, leaf_blocks):
1200 @brief Stores clustering results in a convenient way. 1202 @param[in] amount_clusters (uint): Amount of cluster that was allocated during processing. 1203 @param[in] leaf_blocks (list): Leaf BANG-blocks (the smallest cells). 1206 self.
__clusters = [[]
for _
in range(amount_clusters)]
1207 for block
in leaf_blocks:
1208 index = block.get_cluster()
1210 if index
is not None:
1213 self.
__noise += block.get_points()
1219 def __find_block_center(self, level_blocks, unhandled_block_indexes):
1221 @brief Search block that is cluster center for new cluster. 1223 @return (bang_block) Central block for new cluster, if cluster is not found then None value is returned. 1226 for i
in reversed(range(len(level_blocks))):
1230 if level_blocks[i].get_cluster()
is None:
1231 unhandled_block_indexes.remove(i)
1232 return level_blocks[i]
1237 def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
1239 @brief Search block neighbors that are parts of new clusters (density is greater than threshold and that are 1240 not cluster members yet), other neighbors are ignored. 1242 @param[in] block (bang_block): BANG-block for which neighbors should be found (which can be part of cluster). 1243 @param[in] level_blocks (list): BANG-blocks on specific level. 1244 @param[in] unhandled_block_indexes (set): Blocks that have not been processed yet. 1246 @return (list) Block neighbors that can become part of cluster. 1251 handled_block_indexes = []
1252 for unhandled_index
in unhandled_block_indexes:
1253 if block.is_neighbor(level_blocks[unhandled_index]):
1254 handled_block_indexes.append(unhandled_index)
1255 neighbors.append(level_blocks[unhandled_index])
1258 if len(neighbors) == 8:
1261 for handled_index
in handled_block_indexes:
1262 unhandled_block_indexes.remove(handled_index)
1267 def __update_cluster_dendrogram(self, index_cluster, blocks):
1269 @brief Append clustered blocks to dendrogram. 1271 @param[in] index_cluster (uint): Cluster index that was assigned to blocks. 1272 @param[in] blocks (list): Blocks that were clustered. 1278 blocks = sorted(blocks, key=
lambda block: block.get_density(), reverse=
True)
Common visualizer of clusters on 1D, 2D or 3D surface.
pyclustering module for cluster analysis.
def __update_cluster_dendrogram(self, index_cluster, blocks)
Append clustered blocks to dendrogram.
def get_clusters(self)
Returns allocated clusters.
def get_noise(self)
Returns allocated noise.
def set_cluster(self, index)
Assign cluster to the BANG-block by index.
def __draw_clusters(self)
Display clusters and outliers using different colors.
def __draw_block(self, block, block_alpha=0.0)
Display single BANG block on axis.
def get_points(self)
Return points that covers by the BANG-block.
def __calculate_volume(self)
Calculates volume of current spatial block.
BANG-block that represent spatial region in data space.
Utils that are used by modules of pyclustering.
Module for representing clustering results.
def is_neighbor(self, block)
Performs calculation to check whether specified block is neighbor to the current. ...
def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes)
Expand cluster from specific block that is considered as a central block.
def get_height(self)
Returns height of BANG tree where blocks are stored.
Colors used by pyclustering library for visualization.
def is_neighbor(self, block)
Performs calculation to identify whether specified block is neighbor of current block.
Visualizer of BANG algorithm's results.
def get_region(self)
Returns region number of BANG-block.
def __init__(self, data, levels, kwargs)
Create BANG directory - basically tree structure with direct access to leafs.
def __contains__(self, point)
Point is considered as contained if it lies in block (belong to it).
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
def __get_amount_points(self)
Count covered points by the BANG-block and if cache is enable then covered points are stored...
def __init__(self, data, region, level, space_block, cache_points=False)
Create BANG-block.
def get_leafs(self)
Return leafs - the smallest blocks.
Class implements BANG grid based clustering algorithm.
def __create_directory(self)
Create BANG directory as a tree with separate storage for leafs.
def __str__(self)
Returns string representation of BANG-block using region number and level where block is located...
def __draw_cluster(self, data, cluster, color, marker)
Draw 2-D single cluster on axis using specified color and marker.
def __find_block_center(self, level_blocks, unhandled_block_indexes)
Search block that is cluster center for new cluster.
def __allocate_clusters(self)
Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
def get_dendrogram(self)
Returns dendrogram of clusters.
Geometrical description of BANG block in data space.
def __len__(self)
Returns block size defined by amount of points that are contained by this block.
Provides service for creating 2-D animation using BANG clustering results.
def get_corners(self)
Return spatial description of current block.
def __split_block(self, block, split_dimension, cache_require, current_level_blocks)
Split specific block in specified dimension.
def __increment_block(self)
Increment BANG block safely by updating block index, level and level block.
def get_data(self)
Return data that is stored in the directory.
def __str__(self)
Returns string block description.
def get_directory(self)
Returns grid directory that describes grid of the processed data.
def show_blocks(directory)
Show BANG-blocks (leafs only) in data space.
def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes)
Search block neighbors that are parts of new clusters (density is greater than threshold and that are...
def __calculate_density(self, amount_points)
Calculates BANG-block density.
def __calculate_neighborhood(self, block_max_corner)
Calculates neighborhood score that defined whether blocks are neighbors.
def __init__(self, data, levels, ccore=False, kwargs)
Create BANG clustering algorithm.
def __build_level(self, previous_level_blocks, level)
Build new level of directory.
def __store_level_blocks(self, level_blocks)
Store level blocks if observing is enabled.
def __init__(self, directory, clusters)
Creates BANG animator instance.
def __cache_covered_data(self)
Cache covered data.
def __validate_arguments(self)
Check correctness of input arguments and throw exception if incorrect is found.
def get_volume(self)
Returns volume of current block.
def split(self, dimension)
Split current block into two spatial blocks in specified dimension.
def get_density(self)
Returns density of the BANG-block.
def __build_directory_levels(self)
Build levels of direction if amount of level is greater than one.
def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None)
Animates clustering process that is performed by BANG algorithm.
def show_clusters(data, clusters, noise=None)
Display BANG clustering results.
def __init__(self, max_corner, min_corner)
Creates spatial block in data space.
def __len__(self)
Returns amount of blocks that is stored in the directory.
def __draw_leaf_density(self)
Display densities by filling blocks by appropriate colors.
def split(self, split_dimension, cache_points)
Split BANG-block into two new blocks in specified dimension.
def show_dendrogram(dendrogram)
Display dendrogram of BANG-blocks.
def process(self)
Performs clustering process in line with rules of BANG clustering algorithm.
def __validate_arguments(self)
Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception ...
BANG directory stores BANG-blocks that represents grid in data space.
def get_level(self, level)
Returns BANG blocks on the specific level.
def __cache_point(self, index)
Store index points.
def __store_clustering_results(self, amount_clusters, leaf_blocks)
Stores clustering results in a convenient way.
def get_spatial_block(self)
Return spatial block - BANG-block description in data space.
def get_cluster(self)
Return index of cluster to which the BANG-block belongs to.