mbsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: MBSAS (Modified Basic Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2018
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 from pyclustering.core.mbsas_wrapper import mbsas as mbsas_wrapper
29 from pyclustering.core.metric_wrapper import metric_wrapper
30 
31 from pyclustering.cluster.bsas import bsas
32 
33 
34 class mbsas(bsas):
35  """!
36  @brief Class represents MBSAS (Modified Basic Sequential Algorithmic Scheme).
37  @details Interface of MBSAS algorithm is the same as for BSAS. This algorithm performs clustering in two steps.
38  The first - is determination of amount of clusters. The second - is assignment of points that were not
39  marked as a cluster representatives to clusters.
40 
41  Code example of MBSAS usage:
42  @code
43  # Read data sample from 'Simple02.data'.
44  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
45 
46  # Prepare algorithm's parameters.
47  max_clusters = 2;
48  threshold = 1.0;
49 
50  # Create instance of MBSAS algorithm.
51  mbsas_instance = mbsas(sample, max_clusters, threshold);
52  mbsas_instance.process();
53 
54  # Get clustering results.
55  clusters = mbsas_instance.get_clusters();
56  representatives = mbsas_instance.get_representatives();
57 
58  # Display results.
59  bsas_visualizer.show_clusters(sample, clusters, representatives);
60  @endcode
61 
62  @see pyclustering.cluster.bsas, pyclustering.cluster.ttsas
63 
64  """
65 
66  def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
67  """!
68  @brief Creates MBSAS algorithm.
69 
70  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
71  @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
72  @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
73  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
74  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
75 
76  <b>Keyword Args:</b><br>
77  - metric (distance_metric): Metric that is used for distance calculation between two points.
78 
79  """
80  super().__init__(data, maximum_clusters, threshold, ccore, **kwargs);
81 
82 
83  def process(self):
84  """!
85  @brief Performs cluster analysis in line with rules of BSAS algorithm.
86 
87  @remark Results of clustering can be obtained using corresponding get methods.
88 
89  @see get_clusters()
90  @see get_representatives()
91 
92  """
93 
94  if self._ccore is True:
95  self.__process_by_ccore();
96  else:
97  self.__prcess_by_python();
98 
99 
100  def __process_by_ccore(self):
101  ccore_metric = metric_wrapper.create_instance(self._metric);
102  self._clusters, self._representatives = mbsas_wrapper(self._data, self._amount, self._threshold, ccore_metric.get_pointer());
103 
104 
105  def __prcess_by_python(self):
106  self._clusters.append([0]);
107  self._representatives.append(self._data[0]);
108 
109  skipped_objects = [];
110 
111  for i in range(1, len(self._data)):
112  point = self._data[i];
113  index_cluster, distance = self._find_nearest_cluster(point);
114 
115  if (distance > self._threshold) and (len(self._clusters) < self._amount):
116  self._representatives.append(point);
117  self._clusters.append([i]);
118  else:
119  skipped_objects.append(i);
120 
121  for i in skipped_objects:
122  point = self._data[i];
123  index_cluster, _ = self._find_nearest_cluster(point);
124 
125  self._clusters[index_cluster].append(i);
126  self._update_representative(index_cluster, point);
def __init__(self, data, maximum_clusters, threshold, ccore=True, kwargs)
Creates MBSAS algorithm.
Definition: mbsas.py:66
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme. ...
Definition: bsas.py:77
Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
Definition: bsas.py:1
Class represents MBSAS (Modified Basic Sequential Algorithmic Scheme).
Definition: mbsas.py:34
def process(self)
Performs cluster analysis in line with rules of BSAS algorithm.
Definition: mbsas.py:83
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:217
def __prcess_by_python(self)
Definition: bsas.py:166
def __process_by_ccore(self)
Definition: bsas.py:161
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:238