ttsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: TTSAS (Two-Threshold Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2018
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 from pyclustering.core.ttsas_wrapper import ttsas as ttsas_wrapper
29 from pyclustering.core.metric_wrapper import metric_wrapper
30 
31 from pyclustering.cluster.bsas import bsas
32 
33 
34 class ttsas(bsas):
35  """!
36  @brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
37  @details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
38  TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
39  to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
40  second - if distance to the nearest cluster is greater than the second threshold then new cluster is
41  allocated.
42 
43  Code example of TTSAS usage:
44  @code
45  # Read data sample from 'Simple03.data'.
46  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
47 
48  # Prepare algorithm's parameters.
49  threshold1 = 1.0;
50  threshold2 = 2.0;
51 
52  # Create instance of TTSAS algorithm.
53  ttsas_instance = ttsas(sample, max_clusters, threshold);
54  ttsas_instance.process();
55 
56  # Get clustering results.
57  clusters = ttsas_instance.get_clusters();
58  representatives = ttsas_instance.get_representatives();
59 
60  # Display results using BSAS visualizer.
61  bsas_visualizer.show_clusters(sample, clusters, representatives);
62  @endcode
63 
64  @see pyclustering.cluster.bsas, pyclustering.cluster.mbsas
65 
66  """
67 
68  def __init__(self, data, threshold1, threshold2, ccore, **kwargs):
69  """!
70  @brief Creates TTSAS algorithm.
71 
72  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
73  @param[in] threshold1: Dissimilarity level (distance) between point and its closest cluster, if the distance is
74  less than 'threshold1' value then point is assigned to the cluster.
75  @param[in] threshold2: Dissimilarity level (distance) between point and its closest cluster, if the distance is
76  greater than 'threshold2' value then point is considered as a new cluster.
77  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
78  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
79 
80  <b>Keyword Args:</b><br>
81  - metric (distance_metric): Metric that is used for distance calculation between two points.
82 
83  """
84 
85  self._threshold2 = threshold2;
86  self._amount_skipped_objects = len(data);
87  self._skipped_objects = [ True ] * len(data);
88 
89  super().__init__(data, len(data), threshold1, ccore, **kwargs);
90 
91 
92  def process(self):
93  """!
94  @brief Performs cluster analysis in line with rules of BSAS algorithm.
95 
96  @remark Results of clustering can be obtained using corresponding get methods.
97 
98  @see get_clusters()
99  @see get_representatives()
100 
101  """
102 
103  if self._ccore is True:
104  self.__process_by_ccore();
105  else:
106  self.__prcess_by_python();
107 
108 
109  def __process_by_ccore(self):
110  ccore_metric = metric_wrapper.create_instance(self._metric);
111  self._clusters, self._representatives = ttsas_wrapper(self._data, self._threshold, self._threshold2, ccore_metric.get_pointer());
112 
113 
114  def __prcess_by_python(self):
115  changes = 0;
116  while self._amount_skipped_objects != 0:
117  previous_amount = self._amount_skipped_objects;
118  self.__process_objects(changes);
119 
120  changes = previous_amount - self._amount_skipped_objects;
121 
122 
123  def __process_objects(self, changes):
124  index_point = self._skipped_objects.index(True);
125 
126  if changes == 0:
127  self.__allocate_cluster(index_point, self._data[index_point]);
128  index_point += 1;
129 
130  for i in range(index_point, len(self._data)):
131  if self._skipped_objects[i] is True:
132  self.__process_skipped_object(i);
133 
134 
135  def __process_skipped_object(self, index_point):
136  point = self._data[index_point];
137 
138  index_cluster, distance = self._find_nearest_cluster(point);
139 
140  if distance <= self._threshold:
141  self.__append_to_cluster(index_cluster, index_point, point);
142  elif distance > self._threshold2:
143  self.__allocate_cluster(index_point, point);
144 
145 
146  def __append_to_cluster(self, index_cluster, index_point, point):
147  self._clusters[index_cluster].append(index_point);
148  self._update_representative(index_cluster, point);
149 
150  self._amount_skipped_objects -= 1;
151  self._skipped_objects[index_point] = False;
152 
153 
154  def __allocate_cluster(self, index_point, point):
155  self._clusters.append( [index_point] );
156  self._representatives.append(point);
157 
158  self._amount_skipped_objects -= 1;
159  self._skipped_objects[index_point] = False;
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme. ...
Definition: bsas.py:77
Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
Definition: bsas.py:1
def __process_skipped_object(self, index_point)
Definition: ttsas.py:135
Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
Definition: ttsas.py:34
def __allocate_cluster(self, index_point, point)
Definition: ttsas.py:154
def __init__(self, data, threshold1, threshold2, ccore, kwargs)
Creates TTSAS algorithm.
Definition: ttsas.py:68
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:217
def __prcess_by_python(self)
Definition: bsas.py:166
def __process_by_ccore(self)
Definition: bsas.py:161
def __append_to_cluster(self, index_cluster, index_point, point)
Definition: ttsas.py:146
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:238
def process(self)
Performs cluster analysis in line with rules of BSAS algorithm.
Definition: ttsas.py:92
def __process_objects(self, changes)
Definition: ttsas.py:123