3 @brief Cluster analysis algorithm: BANG. 4 @details Implementation based on paper @cite inproceedings::bang::1. 6 @authors Andrei Novikov (pyclustering@yandex.ru) 8 @copyright GNU Public License 10 @cond GNU_PUBLIC_LICENSE 11 PyClustering is free software: you can redistribute it and/or modify 12 it under the terms of the GNU General Public License as published by 13 the Free Software Foundation, either version 3 of the License, or 14 (at your option) any later version. 16 PyClustering is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License for more details. 21 You should have received a copy of the GNU General Public License 22 along with this program. If not, see <http://www.gnu.org/licenses/>. 32 import matplotlib.gridspec
as gridspec
33 import matplotlib.pyplot
as plt
34 import matplotlib.patches
as patches
35 import matplotlib.animation
as animation
36 except Exception
as error_instance:
37 warnings.warn(
"Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization " 38 "functionality is not available (details: '%s')." % str(error_instance))
50 @brief Visualizer of BANG algorithm's results. 51 @details BANG visualizer provides visualization services that are specific for BANG algorithm. 56 __maximum_density_alpha = 0.6
62 @brief Show BANG-blocks (leafs only) in data space. 63 @details BANG-blocks represents grid that was used for clustering process. 65 @param[in] directory (bang_directory): Directory that was created by BANG algorithm during clustering process. 69 dimension = len(directory.get_data()[0])
73 amount_canvases = int(dimension * (dimension - 1) / 2)
76 grid_spec = gridspec.GridSpec(1, amount_canvases)
78 pairs = list(itertools.combinations(range(dimension), 2))
79 if len(pairs) == 0: pairs = [(0, 0)]
81 for index
in range(amount_canvases):
82 ax = figure.add_subplot(grid_spec[index])
83 bang_visualizer.__draw_blocks(ax, directory.get_leafs(), pairs[index])
84 bang_visualizer.__draw_two_dimension_data(ax, directory.get_data(), pairs[index])
92 @brief Display dendrogram of BANG-blocks. 94 @param[in] dendrogram (list): List representation of dendrogram of BANG-blocks. 96 @see bang.get_dendrogram() 100 axis = plt.subplot(1, 1, 1)
103 for index_cluster
in range(len(dendrogram)):
104 densities = [ block.get_density()
for block
in dendrogram[index_cluster] ]
105 xrange = range(current_position, current_position + len(densities))
107 axis.bar(xrange, densities, 1.0, linewidth=0.0, color=color_list.get_color(index_cluster))
109 current_position += len(densities)
111 axis.set_ylabel(
"density")
112 axis.set_xlabel(
"block")
113 axis.xaxis.set_ticklabels([])
115 plt.xlim([-0.5, current_position - 0.5])
122 @brief Display K-Means clustering results. 124 @param[in] data (list): Dataset that was used for clustering. 125 @param[in] clusters (array_like): Clusters that were allocated by the algorithm. 126 @param[in] noise (array_like): Noise that were allocated by the algorithm. 130 visualizer.append_clusters(clusters, data)
131 visualizer.append_cluster(noise
or [], data, marker=
'x')
136 def __draw_two_dimension_data(ax, data, pair):
138 @brief Display data in two-dimensional canvas. 140 @param[in] ax (Axis): Canvas where data should be displayed. 141 @param[in] data (list): Data points that should be displayed. 142 @param[in] pair (tuple): Pair of dimension indexes. 145 ax.set_xlabel(
"x%d" % pair[0])
146 ax.set_ylabel(
"x%d" % pair[1])
150 ax.plot(point[pair[0]], point[pair[1]], color=
'red', marker=
'.')
152 ax.plot(point[pair[0]], 0, color=
'red', marker=
'.')
153 ax.yaxis.set_ticklabels([])
157 def __draw_blocks(ax, blocks, pair):
159 @brief Display BANG-blocks on specified figure. 161 @param[in] ax (Axis): Axis where bang-blocks should be displayed. 162 @param[in] blocks (list): List of blocks that should be displyed. 163 @param[in] pair (tuple): Pair of coordinate index that should be displayed. 168 density_scale = blocks[-1].get_density()
170 bang_visualizer.__draw_block(ax, pair, block, density_scale)
174 def __draw_block(ax, pair, block, density_scale):
176 @brief Display BANG-block on the specified ax. 178 @param[in] ax (Axis): Axis where block should be displayed. 179 @param[in] pair (tuple): Pair of coordinate index that should be displayed. 180 @param[in] block (bang_block): BANG-block that should be displayed. 181 @param[in] density_scale (double): Max density to display density of the block by appropriate tone. 184 max_corner, min_corner = bang_visualizer.__get_rectangle_description(block, pair)
186 belong_cluster = block.get_cluster()
is not None 188 if density_scale != 0.0:
189 density_scale = bang_visualizer.__maximum_density_alpha * block.get_density() / density_scale
191 face_color = matplotlib.colors.to_rgba(
'blue', alpha=density_scale)
192 edge_color = matplotlib.colors.to_rgba(
'black', alpha=1.0)
194 rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
196 facecolor=face_color,
197 edgecolor=edge_color,
203 def __get_rectangle_description(block, pair):
205 @brief Create rectangle description for block in specific dimension. 207 @param[in] pair (tuple): Pair of coordinate index that should be displayed. 208 @param[in] block (bang_block): BANG-block that should be displayed 210 @return (tuple) Pair of corners that describes rectangle. 213 max_corner, min_corner = block.get_spatial_block().get_corners()
215 max_corner = [max_corner[pair[0]], max_corner[pair[1]]]
216 min_corner = [min_corner[pair[0]], min_corner[pair[1]]]
219 max_corner[1], min_corner[1] = 1.0, -1.0
221 return max_corner, min_corner
226 @brief Provides service for creating 2-D animation using BANG clustering results. 227 @details The animator does not support visualization of clustering process where non 2-dimensional was used. 229 Code example of animation of BANG clustering process: 231 from pyclustering.cluster.bang import bang, bang_animator 232 from pyclustering.utils import read_sample 233 from pyclustering.samples.definitions import FCPS_SAMPLES 235 # Read data two dimensional data. 236 data = read_sample(FCPS_SAMPLES.SAMPLE_LSUN) 238 # Create instance of BANG algorithm. 239 bang_instance = bang(data, 9) 240 bang_instance.process() 242 # Obtain clustering results. 243 clusters = bang_instance.get_clusters() 244 noise = bang_instance.get_noise() 245 directory = bang_instance.get_directory() 247 # Create BANG animation using class 'bang_animator': 248 animator = bang_animator(directory, clusters, noise) 256 @brief Creates BANG animator instance. 258 @param[in] directory (bang_directory): BANG directory that was formed during BANG clustering process. 259 @param[in] clusters (list): Allocated clusters during BANG clustering process. 277 def __validate_arguments(self):
279 @brief Check correctness of input arguments and throw exception if incorrect is found. 283 raise ValueError(
"Impossible to animate BANG clustering process for non 2D data.")
286 def __increment_block(self):
288 @brief Increment BANG block safely by updating block index, level and level block. 300 def __draw_block(self, block, block_alpha=0.0):
302 @brief Display single BANG block on axis. 304 @param[in] block (bang_block): BANG block that should be displayed. 305 @param[in] block_alpha (double): Transparency level - value of alpha. 308 max_corner, min_corner = block.get_spatial_block().get_corners()
310 face_color = matplotlib.colors.to_rgba(
'blue', alpha=block_alpha)
311 edge_color = matplotlib.colors.to_rgba(
'black', alpha=1.0)
313 rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
315 facecolor=face_color,
316 edgecolor=edge_color,
318 self.
__ax.add_patch(rect)
321 def __draw_leaf_density(self):
323 @brief Display densities by filling blocks by appropriate colors. 327 density_scale = leafs[-1].get_density()
329 if density_scale == 0.0: density_scale = 1.0
332 alpha = 0.8 * block.get_density() / density_scale
336 def __draw_clusters(self):
338 @brief Display clusters and outliers using different colors. 342 for index_cluster
in range(len(self.
__clusters)):
343 color = color_list.get_color(index_cluster)
349 def __draw_cluster(self, data, cluster, color, marker):
351 @brief Draw 2-D single cluster on axis using specified color and marker. 355 self.
__ax.plot(data[item][0], data[item][1], color=color, marker=marker)
358 def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None):
360 @brief Animates clustering process that is performed by BANG algorithm. 362 @param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only). 363 @param[in] movie_fps (uint): Defines frames per second (for rendering movie only). 364 @param[in] movie_filename (string): If it is specified then animation will be stored to file that is specified in this parameter. 370 self.
__figure.suptitle(
"BANG algorithm", fontsize=18, fontweight=
'bold')
373 self.
__ax.plot(point[0], point[1], color=
'red', marker=
'.')
375 return frame_generation(0)
378 def frame_generation(index_iteration):
394 self.
__figure.suptitle(
"BANG algorithm", fontsize=18, fontweight=
'bold')
404 cluster_animation = animation.FuncAnimation(self.
__figure, frame_generation, iterations,
405 interval=animation_velocity,
406 init_func=init_frame,
409 if movie_filename
is not None:
410 cluster_animation.save(movie_filename, writer =
'ffmpeg', fps = movie_fps, bitrate = 3500)
418 @brief BANG directory stores BANG-blocks that represents grid in data space. 419 @details The directory build BANG-blocks in binary tree manner. Leafs of the tree stored separately to provide 420 a direct access to the leafs that should be analysed. Leafs cache data-points. 425 @brief Create BANG directory - basically tree structure with direct access to leafs. 427 @param[in] data (list): Input data that is clustered. 428 @param[in] levels (uint): Height of the blocks tree. 429 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe'). 431 <b>Keyword Args:</b><br> 432 - observe (bool): If 'True' then blocks on each level are stored. 433 - density_threshold (double): The lowest level of density when contained data in bang-block is 434 considered as a noise and there is no need to split it till the last level. Be aware that this 435 parameter is used with 'amount_threshold' parameter. 436 - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is 437 considered as a noise and there is no need to split it till the last level. 448 self.
__observe = kwargs.get(
'observe',
True)
455 @brief Returns amount of blocks that is stored in the directory 457 @return (uint) Amount of blocks in the BANG directory. 465 @brief Return data that is stored in the directory. 467 @return (list) List of points that represents stored data. 475 @brief Return leafs - the smallest blocks. 476 @details Some leafs can be bigger than others because splitting is not performed for blocks whose density is 479 @return (list) List of blocks that are leafs of BANG directory. 487 @brief Returns BANG blocks on the specific level. 489 @param[in] level (uint): Level of tree where BANG blocks are located. 491 @return (list) List of BANG blocks on the specific level. 499 @brief Returns height of BANG tree where blocks are stored. 501 @return (uint) Height of BANG tree. 507 def __create_directory(self):
509 @brief Create BANG directory as a tree with separate storage for leafs. 513 min_corner, max_corner = data_corners(self.
__data)
516 cache_require = (self.
__levels == 1)
526 def __store_level_blocks(self, level_blocks):
528 @brief Store level blocks if observing is enabled. 530 @param[in] level_blocks (list): Created blocks on a new level. 533 self.
__size += len(level_blocks)
539 def __build_directory_levels(self):
541 @brief Build levels of direction if amount of level is greater than one. 545 previous_level_blocks = [ self.
__root ]
547 for level
in range(1, self.
__levels):
548 previous_level_blocks = self.
__build_level(previous_level_blocks, level)
551 self.
__leafs = sorted(self.
__leafs, key=
lambda block: block.get_density())
554 def __build_level(self, previous_level_blocks, level):
556 @brief Build new level of directory. 558 @param[in] previous_level_blocks (list): BANG-blocks on the previous level. 559 @param[in] level (uint): Level number that should be built. 561 @return (list) New block on the specified level. 564 current_level_blocks = []
566 split_dimension = level % len(self.
__data[0])
567 cache_require = (level == self.
__levels - 1)
569 for block
in previous_level_blocks:
570 self.
__split_block(block, split_dimension, cache_require, current_level_blocks)
573 self.
__leafs += current_level_blocks
575 return current_level_blocks
578 def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
580 @brief Split specific block in specified dimension. 581 @details Split is not performed for block whose density is lower than threshold value, such blocks are putted to 584 @param[in] block (bang_block): BANG-block that should be split. 585 @param[in] split_dimension (uint): Dimension at which splitting should be performed. 586 @param[in] cache_require (bool): Defines when points in cache should be stored during density calculation. 587 @param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added. 594 left, right = block.split(split_dimension, cache_require)
595 current_level_blocks.append(left)
596 current_level_blocks.append(right)
602 @brief Geometrical description of BANG block in data space. 603 @details Provides services related to spatial function and used by bang_block 611 @brief Creates spatial block in data space. 613 @param[in] max_corner (array_like): Maximum corner coordinates of the block. 614 @param[in] min_corner (array_like): Minimal corner coordinates of the block. 624 @brief Returns string block description. 626 @return String representation of the block. 634 @brief Point is considered as contained if it lies in block (belong to it). 636 @return (bool) True if point is in block, otherwise False. 639 for i
in range(len(point)):
648 @brief Return spatial description of current block. 650 @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner). 658 @brief Returns volume of current block. 659 @details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle, 660 for 3D is volume of 3D figure, and for ND is volume of ND figure. 662 @return (double) Volume of current block. 670 @brief Split current block into two spatial blocks in specified dimension. 672 @param[in] dimension (uint): Dimension where current block should be split. 674 @return (tuple) Pair of new split blocks from current block. 682 first_max_corner[dimension] = split_border
683 second_min_corner[dimension] = split_border
690 @brief Performs calculation to identify whether specified block is neighbor of current block. 692 @param[in] block (spatial_block): Another block that is check whether it is neighbor. 694 @return (bool) True is blocks are neighbors, False otherwise. 697 if block
is not self:
698 block_max_corner, _ = block.get_corners()
699 dimension = len(block_max_corner)
702 if neighborhood_score == dimension:
708 def __calculate_neighborhood(self, block_max_corner):
710 @brief Calculates neighborhood score that defined whether blocks are neighbors. 712 @param[in] block_max_corner (list): Maximum coordinates of other block. 714 @return (uint) Neighborhood score. 717 dimension = len(block_max_corner)
721 neighborhood_score = 0
722 for i
in range(dimension):
725 if diff <= length_edges[i] + length_edges[i] * 0.0001:
726 neighborhood_score += 1
728 return neighborhood_score
731 def __calculate_volume(self):
733 @brief Calculates volume of current spatial block. 734 @details If empty dimension is detected (where all points has the same value) then such dimension is ignored 735 during calculation of volume. 737 @return (double) Volume of current spatial block. 745 if side_length != 0.0:
746 if volume == 0.0: volume = side_length
747 else: volume *= side_length
754 @brief BANG-block that represent spatial region in data space. 757 def __init__(self, data, region, level, space_block, cache_points=False):
759 @brief Create BANG-block. 761 @param[in] data (list): List of points that are processed. 762 @param[in] region (uint): Region number - unique value on a level. 763 @param[in] level (uint): Level number where block is created. 764 @param[in] space_block (spatial_block): Spatial block description in data space. 765 @param[in] cache_points (bool): if True then points are stored in memory (used for leaf blocks). 782 @brief Returns string representation of BANG-block using region number and level where block is located. 790 @brief Returns block size defined by amount of points that are contained by this block. 798 @brief Returns region number of BANG-block. 799 @details Region number is unique on among region numbers on a directory level. Pair of region number and level 800 is unique for all directory. 802 @return (uint) Region number. 810 @brief Returns density of the BANG-block. 812 @return (double) BANG-block density. 820 @brief Return index of cluster to which the BANG-block belongs to. 821 @details Index of cluster may have None value if the block was not assigned to any cluster. 823 @return (uint) Index of cluster or None if the block does not belong to any cluster. 831 @brief Return spatial block - BANG-block description in data space. 833 @return (spatial_block) Spatial block of the BANG-block. 841 @brief Return points that covers by the BANG-block. 843 @return (list) List of point indexes that are covered by the block. 854 @brief Assign cluster to the BANG-block by index. 856 @param[in] index (uint): Index cluster that is assigned to BANG-block. 864 @brief Performs calculation to check whether specified block is neighbor to the current. 866 @param[in] block (bang_block): Other BANG-block that should be checked for neighborhood. 868 @return (bool) True if blocks are neighbors, False if blocks are not neighbors. 874 def split(self, split_dimension, cache_points):
876 @brief Split BANG-block into two new blocks in specified dimension. 878 @param[in] split_dimension (uint): Dimension where block should be split. 879 @param[in] cache_points (bool): If True then covered points are cached. Used for leaf blocks. 881 @return (tuple) Pair of BANG-block that were formed from the current. 895 def __calculate_density(self, amount_points):
897 @brief Calculates BANG-block density. 899 @param[in] amount_points (uint): Amount of points in block. 901 @return (double) BANG-block density. 906 return amount_points / volume
911 def __get_amount_points(self):
913 @brief Count covered points by the BANG-block and if cache is enable then covered points are stored. 915 @return (uint) Amount of covered points. 919 for index
in range(len(self.
__data)):
927 def __cache_covered_data(self):
929 @brief Cache covered data. 935 for index_point
in range(len(self.
__data)):
940 def __cache_point(self, index):
942 @brief Store index points. 944 @param[in] index (uint): Index point that should be stored. 957 @brief Class implements BANG grid based clustering algorithm. 958 @details BANG clustering algorithms uses a multidimensional grid structure to organize the value space surrounding 959 the pattern values. The patterns are grouped into blocks and clustered with respect to the blocks by 960 a topological neighbor search algorithm @cite inproceedings::bang::1. 962 Code example of BANG usage: 964 from pyclustering.cluster.bang import bang, bang_visualizer 965 from pyclustering.utils import read_sample 966 from pyclustering.samples.definitions import FCPS_SAMPLES 968 # Read data three dimensional data. 969 data = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK) 971 # Prepare algorithm's parameters. 974 # Create instance of BANG algorithm. 975 bang_instance = bang(data, levels) 976 bang_instance.process() 978 # Obtain clustering results. 979 clusters = bang_instance.get_clusters() 980 noise = bang_instance.get_noise() 981 directory = bang_instance.get_directory() 982 dendrogram = bang_instance.get_dendrogram() 984 # Visualize BANG clustering results. 985 bang_visualizer.show_blocks(directory) 986 bang_visualizer.show_dendrogram(dendrogram) 987 bang_visualizer.show_clusters(data, clusters, noise) 990 There is visualization of BANG-clustering of three-dimensional data 'chainlink'. BANG-blocks that were formed during 991 processing are shown on following figure. The darkest color means highest density, blocks that does not cover points 993 @image html bang_blocks_chainlink.png "Fig. 1. BANG-blocks that cover input data." 995 Here is obtained dendrogram that can be used for further analysis to improve clustering results: 996 @image html bang_dendrogram_chainlink.png "Fig. 2. BANG dendrogram where the X-axis contains BANG-blocks, the Y-axis contains density." 998 BANG clustering result of 'chainlink' data: 999 @image html bang_clustering_chainlink.png "Fig. 3. BANG clustering result. Data: 'chainlink'." 1003 def __init__(self, data, levels, ccore=False, **kwargs):
1005 @brief Create BANG clustering algorithm. 1007 @param[in] data (list): Input data (list of points) that should be clustered. 1008 @param[in] levels (uint): Amount of levels in tree that is used for splitting (how many times block should be 1009 split). For example, if amount of levels is two then surface will be divided into two blocks and 1010 each obtained block will be divided into blocks also. 1011 @param[in] ccore (bool): Reserved positional argument - not used yet. 1012 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe'). 1014 <b>Keyword Args:</b><br> 1015 - density_threshold (double): If block density is smaller than this value then contained data by this 1016 block is considered as a noise and its points as outliers. Block density is defined by amount of 1017 points in block divided by block volume: <i>amount_block_points</i>/<i>block_volume</i>. By default 1018 it is 0.0 - means than only empty blocks are considered as noise. Be aware that this parameter is used 1019 with parameter 'amount_threshold' - the maximum threshold is considered during processing. 1020 - amount_threshold (uint): Amount of points in the block when it contained data in bang-block is 1021 considered as a noise and there is no need to split it till the last level. Be aware that this parameter 1022 is used with parameter 'density_threshold' - the maximum threshold is considered during processing. 1041 @brief Performs clustering process in line with rules of BANG clustering algorithm. 1045 @see get_directory() 1046 @see get_dendrogram() 1057 @brief Returns allocated clusters. 1059 @remark Allocated clusters are returned only after data processing (method process()). Otherwise empty list is returned. 1061 @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data. 1072 @brief Returns allocated noise. 1074 @remark Allocated noise is returned only after data processing (method process()). Otherwise empty list is returned. 1076 @return (list) List of indexes that are marked as a noise. 1087 @brief Returns grid directory that describes grid of the processed data. 1089 @remark Grid directory is returned only after data processing (method process()). Otherwise None value is returned. 1091 @return (bang_directory) BANG directory that describes grid of process data. 1101 @brief Returns dendrogram of clusters. 1102 @details Dendrogram is created in following way: the density indices of all regions are calculated and sorted 1103 in decreasing order for each cluster during clustering process. 1105 @remark Dendrogram is returned only after data processing (method process()). Otherwise empty list is returned. 1113 @brief Returns clustering result representation type that indicate how clusters are encoded. 1115 @return (type_encoding) Clustering result representation. 1121 return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
1124 def __validate_arguments(self):
1126 @brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception 1131 raise ValueError(
"Incorrect amount of levels '%d'. Level value should be greater than 0." % self.
__levels)
1133 if len(self.
__data) == 0:
1134 raise ValueError(
"Empty input data. Data should contain at least one point.")
1137 raise ValueError(
"Incorrect density threshold '%f'. Density threshold should not be negative." % self.
__density_threshold)
1140 def __allocate_clusters(self):
1142 @brief Performs cluster allocation using leafs of tree in BANG directory (the smallest cells). 1146 unhandled_block_indexes = set([i
for i
in range(len(leaf_blocks))
if leaf_blocks[i].get_density() > self.
__density_threshold])
1151 while current_block
is not None:
1163 def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
1165 @brief Expand cluster from specific block that is considered as a central block. 1167 @param[in] block (bang_block): Block that is considered as a central block for cluster. 1168 @param[in] cluster_index (uint): Index of cluster that is assigned to blocks that forms new cluster. 1169 @param[in] leaf_blocks (list): Leaf BANG-blocks that are considered during cluster formation. 1170 @param[in] unhandled_block_indexes (set): Set of candidates (BANG block indexes) to become a cluster member. The 1171 parameter helps to reduce traversing among BANG-block providing only restricted set of block that 1172 should be considered. 1176 block.set_cluster(cluster_index)
1182 for neighbor
in neighbors:
1183 neighbor.set_cluster(cluster_index)
1187 neighbors += neighbor_neighbors
1190 def __store_clustering_results(self, amount_clusters, leaf_blocks):
1192 @brief Stores clustering results in a convenient way. 1194 @param[in] amount_clusters (uint): Amount of cluster that was allocated during processing. 1195 @param[in] leaf_blocks (list): Leaf BANG-blocks (the smallest cells). 1198 self.
__clusters = [[]
for _
in range(amount_clusters)]
1199 for block
in leaf_blocks:
1200 index = block.get_cluster()
1202 if index
is not None:
1205 self.
__noise += block.get_points()
1211 def __find_block_center(self, level_blocks, unhandled_block_indexes):
1213 @brief Search block that is cluster center for new cluster. 1215 @return (bang_block) Central block for new cluster, if cluster is not found then None value is returned. 1218 for i
in reversed(range(len(level_blocks))):
1222 if level_blocks[i].get_cluster()
is None:
1223 unhandled_block_indexes.remove(i)
1224 return level_blocks[i]
1229 def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
1231 @brief Search block neighbors that are parts of new clusters (density is greater than threshold and that are 1232 not cluster members yet), other neighbors are ignored. 1234 @param[in] block (bang_block): BANG-block for which neighbors should be found (which can be part of cluster). 1235 @param[in] level_blocks (list): BANG-blocks on specific level. 1236 @param[in] unhandled_block_indexes (set): Blocks that have not been processed yet. 1238 @return (list) Block neighbors that can become part of cluster. 1243 handled_block_indexes = []
1244 for unhandled_index
in unhandled_block_indexes:
1245 if block.is_neighbor(level_blocks[unhandled_index]):
1246 handled_block_indexes.append(unhandled_index)
1247 neighbors.append(level_blocks[unhandled_index])
1250 if len(neighbors) == 8:
1253 for handled_index
in handled_block_indexes:
1254 unhandled_block_indexes.remove(handled_index)
1259 def __update_cluster_dendrogram(self, index_cluster, blocks):
1261 @brief Append clustered blocks to dendrogram. 1263 @param[in] index_cluster (uint): Cluster index that was assigned to blocks. 1264 @param[in] blocks (list): Blocks that were clustered. 1270 blocks = sorted(blocks, key=
lambda block: block.get_density(), reverse=
True)
Common visualizer of clusters on 1D, 2D or 3D surface.
pyclustering module for cluster analysis.
def __update_cluster_dendrogram(self, index_cluster, blocks)
Append clustered blocks to dendrogram.
def get_clusters(self)
Returns allocated clusters.
def get_noise(self)
Returns allocated noise.
def set_cluster(self, index)
Assign cluster to the BANG-block by index.
def __draw_clusters(self)
Display clusters and outliers using different colors.
def __draw_block(self, block, block_alpha=0.0)
Display single BANG block on axis.
def get_points(self)
Return points that covers by the BANG-block.
def __calculate_volume(self)
Calculates volume of current spatial block.
BANG-block that represent spatial region in data space.
Utils that are used by modules of pyclustering.
Module for representing clustering results.
def is_neighbor(self, block)
Performs calculation to check whether specified block is neighbor to the current. ...
def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes)
Expand cluster from specific block that is considered as a central block.
def get_height(self)
Returns height of BANG tree where blocks are stored.
Colors used by pyclustering library for visualization.
def is_neighbor(self, block)
Performs calculation to identify whether specified block is neighbor of current block.
Visualizer of BANG algorithm's results.
def get_region(self)
Returns region number of BANG-block.
def __init__(self, data, levels, kwargs)
Create BANG directory - basically tree structure with direct access to leafs.
def __contains__(self, point)
Point is considered as contained if it lies in block (belong to it).
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
def __get_amount_points(self)
Count covered points by the BANG-block and if cache is enable then covered points are stored...
def __init__(self, data, region, level, space_block, cache_points=False)
Create BANG-block.
def get_leafs(self)
Return leafs - the smallest blocks.
Class implements BANG grid based clustering algorithm.
def __create_directory(self)
Create BANG directory as a tree with separate storage for leafs.
def __str__(self)
Returns string representation of BANG-block using region number and level where block is located...
def __draw_cluster(self, data, cluster, color, marker)
Draw 2-D single cluster on axis using specified color and marker.
def __find_block_center(self, level_blocks, unhandled_block_indexes)
Search block that is cluster center for new cluster.
def __allocate_clusters(self)
Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
def get_dendrogram(self)
Returns dendrogram of clusters.
Geometrical description of BANG block in data space.
def __len__(self)
Returns block size defined by amount of points that are contained by this block.
Provides service for creating 2-D animation using BANG clustering results.
def get_corners(self)
Return spatial description of current block.
def __split_block(self, block, split_dimension, cache_require, current_level_blocks)
Split specific block in specified dimension.
def __increment_block(self)
Increment BANG block safely by updating block index, level and level block.
def get_data(self)
Return data that is stored in the directory.
def __str__(self)
Returns string block description.
def get_directory(self)
Returns grid directory that describes grid of the processed data.
def show_blocks(directory)
Show BANG-blocks (leafs only) in data space.
def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes)
Search block neighbors that are parts of new clusters (density is greater than threshold and that are...
def __calculate_density(self, amount_points)
Calculates BANG-block density.
def __calculate_neighborhood(self, block_max_corner)
Calculates neighborhood score that defined whether blocks are neighbors.
def __init__(self, data, levels, ccore=False, kwargs)
Create BANG clustering algorithm.
def __build_level(self, previous_level_blocks, level)
Build new level of directory.
def __store_level_blocks(self, level_blocks)
Store level blocks if observing is enabled.
def __init__(self, directory, clusters)
Creates BANG animator instance.
def __cache_covered_data(self)
Cache covered data.
def __validate_arguments(self)
Check correctness of input arguments and throw exception if incorrect is found.
def get_volume(self)
Returns volume of current block.
def split(self, dimension)
Split current block into two spatial blocks in specified dimension.
def get_density(self)
Returns density of the BANG-block.
def __build_directory_levels(self)
Build levels of direction if amount of level is greater than one.
def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None)
Animates clustering process that is performed by BANG algorithm.
def show_clusters(data, clusters, noise=None)
Display K-Means clustering results.
def __init__(self, max_corner, min_corner)
Creates spatial block in data space.
def __len__(self)
Returns amount of blocks that is stored in the directory.
def __draw_leaf_density(self)
Display densities by filling blocks by appropriate colors.
def split(self, split_dimension, cache_points)
Split BANG-block into two new blocks in specified dimension.
def show_dendrogram(dendrogram)
Display dendrogram of BANG-blocks.
def process(self)
Performs clustering process in line with rules of BANG clustering algorithm.
def __validate_arguments(self)
Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception ...
BANG directory stores BANG-blocks that represents grid in data space.
def get_level(self, level)
Returns BANG blocks on the specific level.
def __cache_point(self, index)
Store index points.
def __store_clustering_results(self, amount_clusters, leaf_blocks)
Stores clustering results in a convenient way.
def get_spatial_block(self)
Return spatial block - BANG-block description in data space.
def get_cluster(self)
Return index of cluster to which the BANG-block belongs to.