3 @brief Cluster analysis algorithm: ROCK
4 @details Implementation based on paper @cite inproceedings::rock::1.
6 @authors Andrei Novikov (pyclustering@yandex.ru)
8 @copyright BSD-3-Clause
17 from pyclustering.core.wrapper
import ccore_library
19 import pyclustering.core.rock_wrapper
as wrapper
24 @brief The class represents clustering algorithm ROCK.
28 from pyclustering.cluster import cluster_visualizer
29 from pyclustering.cluster.rock import rock
30 from pyclustering.samples.definitions import FCPS_SAMPLES
31 from pyclustering.utils import read_sample
33 # Read sample for clustering from file.
34 sample = read_sample(FCPS_SAMPLES.SAMPLE_HEPTA)
36 # Create instance of ROCK algorithm for cluster analysis. Seven clusters should be allocated.
37 rock_instance = rock(sample, 1.0, 7)
39 # Run cluster analysis.
40 rock_instance.process()
42 # Obtain results of clustering.
43 clusters = rock_instance.get_clusters()
45 # Visualize clustering results.
46 visualizer = cluster_visualizer()
47 visualizer.append_clusters(clusters, sample)
53 def __init__(self, data, eps, number_clusters, threshold=0.5, ccore=True):
55 @brief Constructor of clustering algorithm ROCK.
57 @param[in] data (list): Input data - list of points where each point is represented by list of coordinates.
58 @param[in] eps (double): Connectivity radius (similarity threshold), points are neighbors if distance between them is less than connectivity radius.
59 @param[in] number_clusters (uint): Defines number of clusters that should be allocated from the input data set.
60 @param[in] threshold (double): Value that defines degree of normalization that influences on choice of clusters for merging during processing.
61 @param[in] ccore (bool): Defines should be CCORE (C++ pyclustering library) used instead of Python code or not.
74 self.
__ccore = ccore_library.workable()
86 @brief Performs cluster analysis in line with rules of ROCK algorithm.
88 @return (rock) Returns itself (ROCK instance).
106 if indexes != [-1, -1]:
116 @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
118 @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
129 @brief Returns clustering result representation type that indicate how clusters are encoded.
131 @return (type_encoding) Clustering result representation.
137 return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
140 def __find_pair_clusters(self, clusters):
142 @brief Returns pair of clusters that are best candidates for merging in line with goodness measure.
143 The pair of clusters for which the above goodness measure is maximum is the best pair of clusters to be merged.
145 @param[in] clusters (list): List of clusters that have been allocated during processing, each cluster is represented by list of indexes of points from the input data set.
147 @return (list) List that contains two indexes of clusters (from list 'clusters') that should be merged on this step.
148 It can be equals to [-1, -1] when no links between clusters.
152 maximum_goodness = 0.0
153 cluster_indexes = [-1, -1]
155 for i
in range(0, len(clusters)):
156 for j
in range(i + 1, len(clusters)):
158 if goodness > maximum_goodness:
159 maximum_goodness = goodness
160 cluster_indexes = [i, j]
162 return cluster_indexes
165 def __calculate_links(self, cluster1, cluster2):
167 @brief Returns number of link between two clusters.
168 @details Link between objects (points) exists only if distance between them less than connectivity radius.
170 @param[in] cluster1 (list): The first cluster.
171 @param[in] cluster2 (list): The second cluster.
173 @return (uint) Number of links between two clusters.
179 for index1
in cluster1:
180 for index2
in cluster2:
186 def __create_adjacency_matrix(self):
188 @brief Creates 2D adjacency matrix (list of lists) where each element described existence of link between points (means that points are neighbors).
195 for i
in range(0, size_data):
196 for j
in range(i + 1, size_data):
198 if (distance <= self.
__eps):
204 def __calculate_goodness(self, cluster1, cluster2):
206 @brief Calculates coefficient 'goodness measurement' between two clusters. The coefficient defines level of suitability of clusters for merging.
208 @param[in] cluster1 (list): The first cluster.
209 @param[in] cluster2 (list): The second cluster.
211 @return Goodness measure between two clusters.
218 return number_links / devider
221 def __verify_arguments(self):
223 @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
227 raise ValueError(
"Input data is empty (size: '%d')." % len(self.
__pointer_data))
230 raise ValueError(
"Connectivity radius (current value: '%d') should be greater or equal to 0." % self.
__eps)
233 raise ValueError(
"Threshold (current value: '%d') should be in range (0, 1)." % self.
__threshold)
236 raise ValueError(
"Amount of clusters (current value: '%d') should be greater than 0." %