3 @brief Cluster analysis algorithm: CLARANS.
4 @details Implementation based on paper @cite article::clarans::1.
6 @authors Andrei Novikov (pyclustering@yandex.ru)
8 @copyright BSD-3-Clause
22 @brief Class represents clustering algorithm CLARANS (a method for clustering objects for spatial data mining).
26 def __init__(self, data, number_clusters, numlocal, maxneighbor):
28 @brief Constructor of clustering algorithm CLARANS.
29 @details The higher the value of maxneighbor, the closer is CLARANS to K-Medoids, and the longer is each search of a local minima.
31 @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
32 @param[in] number_clusters (uint): Amount of clusters that should be allocated.
33 @param[in] numlocal (uint): The number of local minima obtained (amount of iterations for solving the problem).
34 @param[in] maxneighbor (uint): The maximum number of neighbors examined.
53 def __verify_arguments(self):
55 @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
59 raise ValueError(
"Input data is empty (size: '%d')." % len(self.
__pointer_data))
62 raise ValueError(
"Amount of cluster (current value: '%d') for allocation should be greater than 0." %
66 raise ValueError(
"Local minima (current value: '%d') should be greater or equal to 0." % self.
__numlocal)
69 raise ValueError(
"Maximum number of neighbors (current value: '%d') should be greater or "
75 @brief Performs cluster analysis in line with rules of CLARANS algorithm.
77 @return (clarans) Returns itself (CLARANS instance).
108 @brief Returns allocated clusters by the algorithm.
110 @remark Allocated clusters can be returned only after data processing (use method process()), otherwise empty list is returned.
112 @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
124 @brief Returns list of medoids of allocated clusters.
136 @brief Returns clustering result representation type that indicate how clusters are encoded.
138 @return (type_encoding) Clustering result representation.
144 return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
147 def __update_clusters(self, medoids):
149 @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids.
154 self.
__clusters = [[]
for i
in range(len(medoids))]
159 for index
in range(len(medoids)):
162 if (dist < dist_optim)
or (index == 0):
166 self.
__clusters[index_optim].append(index_point)
167 self.
__belong[index_point] = index_optim
173 def __optimize_configuration(self):
175 @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules.
182 current_medoid_cluster_index = self.
__belong[current_medoid_index]
185 candidate_medoid_index = random.randint(0, len(self.
__pointer_data) - 1)
187 while candidate_medoid_index
in self.
__current:
188 candidate_medoid_index = random.randint(0, len(self.
__pointer_data) - 1)
194 point_cluster_index = self.
__belong[point_index]
195 point_medoid_index = self.
__current[point_cluster_index]
199 other_medoid_cluster_index = self.
__belong[other_medoid_index]
209 distance_nearest = float(
'inf')
210 if ( (point_medoid_index != candidate_medoid_index)
and (point_medoid_index != current_medoid_cluster_index) ):
214 if (point_cluster_index == current_medoid_cluster_index):
216 if (distance_candidate >= distance_nearest):
217 candidate_cost += distance_nearest - distance_current
221 candidate_cost += distance_candidate - distance_current
223 elif (point_cluster_index == other_medoid_cluster_index):
225 if (distance_candidate > distance_nearest):
230 candidate_cost += distance_candidate - distance_nearest
232 if (candidate_cost < 0):
234 self.
__current[current_medoid_cluster_index] = candidate_medoid_index
246 def __find_another_nearest_medoid(self, point_index, current_medoid_index):
248 @brief Finds the another nearest medoid for the specified point that is differ from the specified medoid.
250 @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is perfomed.
251 @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest.
253 @return (uint) index of the another nearest medoid for the point.
256 other_medoid_index = -1
257 other_distance_nearest = float(
'inf')
259 if (index_medoid != current_medoid_index):
262 if other_distance_candidate < other_distance_nearest:
263 other_distance_nearest = other_distance_candidate
264 other_medoid_index = index_medoid
266 return other_medoid_index
269 def __calculate_estimation(self):
271 @brief Calculates estimation (cost) of the current clusters. The lower the estimation,
272 the more optimally configuration of clusters.
274 @return (double) estimation of current clusters.
278 for index_cluster
in range(0, len(self.
__clusters)):
280 index_medoid = self.
__current[index_cluster]
281 for index_point
in cluster: