bsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2018
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 from pyclustering.core.wrapper import ccore_library;
29 from pyclustering.core.bsas_wrapper import bsas as bsas_wrapper;
30 from pyclustering.core.metric_wrapper import metric_wrapper;
31 
32 from pyclustering.cluster import cluster_visualizer;
33 from pyclustering.cluster.encoder import type_encoding;
34 
35 from pyclustering.utils.metric import type_metric, distance_metric;
36 
37 
39  """!
40  @brief Visualizer of BSAS algorithm's results.
41  @details BSAS visualizer provides visualization services that are specific for BSAS algorithm.
42 
43  """
44 
45  @staticmethod
46  def show_clusters(sample, clusters, representatives, **kwargs):
47  """!
48  @brief Display BSAS clustering results.
49 
50  @param[in] sample (list): Dataset that was used for clustering.
51  @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
52  @param[in] representatives (array_like): Allocated representatives correspond to clusters.
53  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'figure', 'display', 'offset').
54 
55  <b>Keyword Args:</b><br>
56  - figure (figure): If 'None' then new is figure is created, otherwise specified figure is used for visualization.
57  - display (bool): If 'True' then figure will be shown by the method, otherwise it should be shown manually using matplotlib function 'plt.show()'.
58  - offset (uint): Specify axes index on the figure where results should be drawn (only if argument 'figure' is specified).
59 
60  @return (figure) Figure where clusters were drawn.
61 
62  """
63 
64  figure = kwargs.get('figure', None);
65  display = kwargs.get('display', True);
66  offset = kwargs.get('offset', 0);
67 
68  visualizer = cluster_visualizer();
69  visualizer.append_clusters(clusters, sample, canvas=offset);
70 
71  for cluster_index in range(len(clusters)):
72  visualizer.append_cluster_attribute(offset, cluster_index, [representatives[cluster_index]], '*', 10);
73 
74  return visualizer.show(figure=figure, display=display);
75 
76 
77 class bsas:
78  """!
79  @brief Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
80  @details Algorithm has two mandatory parameters: maximum allowable number of clusters and threshold
81  of dissimilarity or in other words maximum distance between points. Distance metric also can
82  be specified using 'metric' parameters, by default 'Manhattan' distance is used.
83  BSAS using following rule for updating cluster representative:
84 
85  \f[
86  \vec{m}_{C_{k}}^{new}=\frac{ \left ( n_{C_{k}^{new}} - 1 \right )\vec{m}_{C_{k}}^{old} + \vec{x} }{n_{C_{k}^{new}}}
87  \f]
88 
89  Clustering results of this algorithm depends on objects order in input data.
90 
91  Example:
92  @code
93  # Read data sample from 'Simple02.data'.
94  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
95 
96  # Prepare algorithm's parameters.
97  max_clusters = 2;
98  threshold = 1.0;
99 
100  # Create instance of BSAS algorithm.
101  bsas_instance = bsas(sample, max_clusters, threshold);
102  bsas_instance.process();
103 
104  # Get clustering results.
105  clusters = bsas_instance.get_clusters();
106  representatives = bsas_instance.get_representatives();
107 
108  # Display results.
109  bsas_visualizer.show_clusters(sample, clusters, representatives);
110  @endcode
111 
112  @see pyclustering.cluster.mbsas, pyclustering.cluster.ttsas
113 
114  """
115 
116  def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
117  """!
118  @brief Creates classical BSAS algorithm.
119 
120  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
121  @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
122  @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
123  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
124  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
125 
126  <b>Keyword Args:</b><br>
127  - metric (distance_metric): Metric that is used for distance calculation between two points.
128 
129  """
130 
131  self._data = data;
132  self._amount = maximum_clusters;
133  self._threshold = threshold;
134  self._metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));
135  self._ccore = ccore and self._metric.get_type() != type_metric.USER_DEFINED;
136 
137  self._clusters = [];
138  self._representatives = [];
139 
140  if self._ccore is True:
141  self._ccore = ccore_library.workable();
142 
143 
144  def process(self):
145  """!
146  @brief Performs cluster analysis in line with rules of BSAS algorithm.
147 
148  @remark Results of clustering can be obtained using corresponding get methods.
149 
150  @see get_clusters()
151  @see get_representatives()
152 
153  """
154 
155  if self._ccore is True:
156  self.__process_by_ccore();
157  else:
158  self.__prcess_by_python();
159 
160 
161  def __process_by_ccore(self):
162  ccore_metric = metric_wrapper.create_instance(self._metric);
163  self._clusters, self._representatives = bsas_wrapper(self._data, self._amount, self._threshold, ccore_metric.get_pointer());
164 
165 
166  def __prcess_by_python(self):
167  self._clusters.append([0]);
168  self._representatives.append(self._data[0]);
169 
170  for i in range(1, len(self._data)):
171  point = self._data[i];
172  index_cluster, distance = self._find_nearest_cluster(point);
173 
174  if (distance > self._threshold) and (len(self._clusters) < self._amount):
175  self._representatives.append(point);
176  self._clusters.append([i]);
177  else:
178  self._clusters[index_cluster].append(i);
179  self._update_representative(index_cluster, point);
180 
181 
182  def get_clusters(self):
183  """!
184  @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
185 
186  @see process()
187  @see get_representatives()
188 
189  """
190  return self._clusters;
191 
192 
194  """!
195  @brief Returns list of representatives of allocated clusters.
196 
197  @see process()
198  @see get_clusters()
199 
200  """
201  return self._representatives;
202 
203 
205  """!
206  @brief Returns clustering result representation type that indicate how clusters are encoded.
207 
208  @return (type_encoding) Clustering result representation.
209 
210  @see get_clusters()
211 
212  """
213 
214  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
215 
216 
217  def _find_nearest_cluster(self, point):
218  """!
219  @brief Find nearest cluster to the specified point.
220 
221  @param[in] point (list): Point from dataset.
222 
223  @return (uint, double) Index of nearest cluster and distance to it.
224 
225  """
226  index_cluster = -1;
227  nearest_distance = float('inf');
228 
229  for index in range(len(self._representatives)):
230  distance = self._metric(point, self._representatives[index]);
231  if distance < nearest_distance:
232  index_cluster = index;
233  nearest_distance = distance;
234 
235  return index_cluster, nearest_distance;
236 
237 
238  def _update_representative(self, index_cluster, point):
239  """!
240  @brief Update cluster representative in line with new cluster size and added point to it.
241 
242  @param[in] index_cluster (uint): Index of cluster whose representative should be updated.
243  @param[in] point (list): Point that was added to cluster.
244 
245  """
246  length = len(self._clusters[index_cluster]);
247  rep = self._representatives[index_cluster];
248 
249  for dimension in range(len(rep)):
250  rep[dimension] = ( (length - 1) * rep[dimension] + point[dimension] ) / length;
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:359
pyclustering module for cluster analysis.
Definition: __init__.py:1
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: bsas.py:204
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme. ...
Definition: bsas.py:77
def get_representatives(self)
Returns list of representatives of allocated clusters.
Definition: bsas.py:193
def process(self)
Performs cluster analysis in line with rules of BSAS algorithm.
Definition: bsas.py:144
Module provides various distance metrics - abstraction of the notion of distance in a metric space...
Definition: metric.py:1
Module for representing clustering results.
Definition: encoder.py:1
Distance metric performs distance calculation between two points in line with encapsulated function...
Definition: metric.py:58
def __init__(self, data, maximum_clusters, threshold, ccore=True, kwargs)
Creates classical BSAS algorithm.
Definition: bsas.py:116
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:217
def get_clusters(self)
Returns list of allocated clusters, each cluster contains indexes of objects in list of data...
Definition: bsas.py:182
def __prcess_by_python(self)
Definition: bsas.py:166
Visualizer of BSAS algorithm&#39;s results.
Definition: bsas.py:38
def show_clusters(sample, clusters, representatives, kwargs)
Display BSAS clustering results.
Definition: bsas.py:46
def __process_by_ccore(self)
Definition: bsas.py:161
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:238