mbsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: MBSAS (Modified Basic Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 from pyclustering.core.mbsas_wrapper import mbsas as mbsas_wrapper
29 from pyclustering.core.metric_wrapper import metric_wrapper
30 
31 from pyclustering.cluster.bsas import bsas
32 
33 
34 class mbsas(bsas):
35  """!
36  @brief Class represents MBSAS (Modified Basic Sequential Algorithmic Scheme).
37  @details Interface of MBSAS algorithm is the same as for BSAS. This algorithm performs clustering in two steps.
38  The first - is determination of amount of clusters. The second - is assignment of points that were not
39  marked as a cluster representatives to clusters.
40 
41  Code example of MBSAS usage:
42  @code
43  from pyclustering.cluster.bsas import bsas_visualizer
44  from pyclustering.cluster.mbsas import mbsas
45  from pyclustering.utils import read_sample
46  from pyclustering.samples.definitions import SIMPLE_SAMPLES
47 
48  # Read data sample from 'Simple02.data'.
49  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2)
50 
51  # Prepare algorithm's parameters.
52  max_clusters = 3
53  threshold = 1.0
54 
55  # Create instance of MBSAS algorithm.
56  mbsas_instance = mbsas(sample, max_clusters, threshold)
57  mbsas_instance.process()
58 
59  # Get clustering results.
60  clusters = mbsas_instance.get_clusters()
61  representatives = mbsas_instance.get_representatives()
62 
63  # Display results.
64  bsas_visualizer.show_clusters(sample, clusters, representatives)
65  @endcode
66 
67  @see pyclustering.cluster.bsas, pyclustering.cluster.ttsas
68 
69  """
70 
71  def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
72  """!
73  @brief Creates MBSAS algorithm.
74 
75  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
76  @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
77  @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
78  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
79  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
80 
81  <b>Keyword Args:</b><br>
82  - metric (distance_metric): Metric that is used for distance calculation between two points.
83 
84  """
85  super().__init__(data, maximum_clusters, threshold, ccore, **kwargs)
86 
87 
88  def process(self):
89  """!
90  @brief Performs cluster analysis in line with MBSAS algorithm.
91 
92  @return (mbsas) Returns itself (MBSAS instance).
93 
94  @see get_clusters()
95  @see get_representatives()
96 
97  """
98 
99  if self._ccore is True:
100  self.__process_by_ccore()
101  else:
102  self.__prcess_by_python()
103 
104  return self
105 
106 
107  def __process_by_ccore(self):
108  ccore_metric = metric_wrapper.create_instance(self._metric)
109  self._clusters, self._representatives = mbsas_wrapper(self._data, self._amount, self._threshold, ccore_metric.get_pointer())
110 
111 
112  def __prcess_by_python(self):
113  self._clusters.append([0]);
114  self._representatives.append(self._data[0]);
115 
116  skipped_objects = [];
117 
118  for i in range(1, len(self._data)):
119  point = self._data[i];
120  index_cluster, distance = self._find_nearest_cluster(point);
121 
122  if (distance > self._threshold) and (len(self._clusters) < self._amount):
123  self._representatives.append(point);
124  self._clusters.append([i]);
125  else:
126  skipped_objects.append(i);
127 
128  for i in skipped_objects:
129  point = self._data[i];
130  index_cluster, _ = self._find_nearest_cluster(point);
131 
132  self._clusters[index_cluster].append(i);
133  self._update_representative(index_cluster, point);
def __init__(self, data, maximum_clusters, threshold, ccore=True, kwargs)
Creates MBSAS algorithm.
Definition: mbsas.py:71
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme. ...
Definition: bsas.py:77
Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
Definition: bsas.py:1
Class represents MBSAS (Modified Basic Sequential Algorithmic Scheme).
Definition: mbsas.py:34
def process(self)
Performs cluster analysis in line with MBSAS algorithm.
Definition: mbsas.py:88
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:227
def __prcess_by_python(self)
Definition: bsas.py:176
def __process_by_ccore(self)
Definition: bsas.py:171
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:248