3 @brief Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
6 @authors Andrei Novikov (pyclustering@yandex.ru)
8 @copyright BSD-3-Clause
13 from pyclustering.core.wrapper
import ccore_library
14 from pyclustering.core.bsas_wrapper
import bsas
as bsas_wrapper
15 from pyclustering.core.metric_wrapper
import metric_wrapper
25 @brief Visualizer of BSAS algorithm's results.
26 @details BSAS visualizer provides visualization services that are specific for BSAS algorithm.
33 @brief Display BSAS clustering results.
35 @param[in] sample (list): Dataset that was used for clustering.
36 @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
37 @param[in] representatives (array_like): Allocated representatives correspond to clusters.
38 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'figure', 'display', 'offset').
40 <b>Keyword Args:</b><br>
41 - figure (figure): If 'None' then new is figure is created, otherwise specified figure is used for visualization.
42 - display (bool): If 'True' then figure will be shown by the method, otherwise it should be shown manually using matplotlib function 'plt.show()'.
43 - offset (uint): Specify axes index on the figure where results should be drawn (only if argument 'figure' is specified).
45 @return (figure) Figure where clusters were drawn.
49 figure = kwargs.get(
'figure',
None)
50 display = kwargs.get(
'display',
True)
51 offset = kwargs.get(
'offset', 0)
54 visualizer.append_clusters(clusters, sample, canvas=offset)
56 for cluster_index
in range(len(clusters)):
57 visualizer.append_cluster_attribute(offset, cluster_index, [representatives[cluster_index]],
'*', 10)
59 return visualizer.show(figure=figure, display=display)
64 @brief Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
65 @details Algorithm has two mandatory parameters: maximum allowable number of clusters and threshold
66 of dissimilarity or in other words maximum distance between points. Distance metric also can
67 be specified using 'metric' parameters, by default 'Manhattan' distance is used.
68 BSAS using following rule for updating cluster representative:
71 \vec{m}_{C_{k}}^{new}=\frac{ \left ( n_{C_{k}^{new}} - 1 \right )\vec{m}_{C_{k}}^{old} + \vec{x} }{n_{C_{k}^{new}}}
74 Clustering results of this algorithm depends on objects order in input data.
78 from pyclustering.cluster.bsas import bsas, bsas_visualizer
79 from pyclustering.utils import read_sample
80 from pyclustering.samples.definitions import SIMPLE_SAMPLES
82 # Read data sample from 'Simple02.data'.
83 sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2)
85 # Prepare algorithm's parameters.
89 # Create instance of BSAS algorithm.
90 bsas_instance = bsas(sample, max_clusters, threshold)
91 bsas_instance.process()
93 # Get clustering results.
94 clusters = bsas_instance.get_clusters()
95 representatives = bsas_instance.get_representatives()
98 bsas_visualizer.show_clusters(sample, clusters, representatives)
101 @see pyclustering.cluster.mbsas, pyclustering.cluster.ttsas
105 def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
107 @brief Creates classical BSAS algorithm.
109 @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
110 @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
111 @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
112 @param[in] ccore (bool): If True than CCORE (C++ part of the library) will be used for solving.
113 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
115 <b>Keyword Args:</b><br>
116 - metric (distance_metric): Metric that is used for distance calculation between two points.
121 self.
_amount = maximum_clusters
124 self.
_ccore = ccore
and self.
_metric.get_type() != type_metric.USER_DEFINED
130 self.
_ccore = ccore_library.workable()
137 @brief Performs cluster analysis in line with rules of BSAS algorithm.
139 @return (bsas) Returns itself (BSAS instance).
141 @remark Results of clustering can be obtained using corresponding get methods.
144 @see get_representatives()
156 def __process_by_ccore(self):
157 ccore_metric = metric_wrapper.create_instance(self.
_metric)
161 def __prcess_by_python(self):
165 for i
in range(1, len(self.
_data)):
166 point = self.
_data[i]
179 @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
182 @see get_representatives()
190 @brief Returns list of representatives of allocated clusters.
201 @brief Returns clustering result representation type that indicate how clusters are encoded.
203 @return (type_encoding) Clustering result representation.
209 return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
212 def _find_nearest_cluster(self, point):
214 @brief Find nearest cluster to the specified point.
216 @param[in] point (list): Point from dataset.
218 @return (uint, double) Index of nearest cluster and distance to it.
222 nearest_distance = float(
'inf')
226 if distance < nearest_distance:
227 index_cluster = index
228 nearest_distance = distance
230 return index_cluster, nearest_distance
233 def _update_representative(self, index_cluster, point):
235 @brief Update cluster representative in line with new cluster size and added point to it.
237 @param[in] index_cluster (uint): Index of cluster whose representative should be updated.
238 @param[in] point (list): Point that was added to cluster.
241 length = len(self.
_clusters[index_cluster])
244 for dimension
in range(len(rep)):
245 rep[dimension] = ( (length - 1) * rep[dimension] + point[dimension] ) / length
248 def _verify_arguments(self):
250 @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
253 if len(self.
_data) == 0:
254 raise ValueError(
"Input data is empty (size: '%d')." % len(self.
_data))
257 raise ValueError(
"Amount of cluster (current value: '%d') for allocation should be greater than 0." %
261 raise ValueError(
"Threshold of dissimilarity (current value: '%d') between points should be greater or "