ttsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: TTSAS (Two-Threshold Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2019
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 from pyclustering.core.ttsas_wrapper import ttsas as ttsas_wrapper
29 from pyclustering.core.metric_wrapper import metric_wrapper
30 
31 from pyclustering.cluster.bsas import bsas
32 
33 
34 class ttsas(bsas):
35  """!
36  @brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
37  @details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
38  TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
39  to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
40  second - if distance to the nearest cluster is greater than the second threshold then new cluster is
41  allocated.
42 
43  Code example of TTSAS usage:
44  @code
45  from pyclustering.cluster.bsas import bsas_visualizer
46  from pyclustering.cluster.ttsas import ttsas
47  from pyclustering.samples.definitions import SIMPLE_SAMPLES
48  from pyclustering.utils import read_sample
49 
50  # Read data sample from 'Simple03.data'.
51  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
52 
53  # Prepare algorithm's parameters.
54  threshold1 = 1.0
55  threshold2 = 2.0
56 
57  # Create instance of TTSAS algorithm.
58  ttsas_instance = ttsas(sample, threshold1, threshold2)
59  ttsas_instance.process()
60 
61  # Get clustering results.
62  clusters = ttsas_instance.get_clusters()
63  representatives = ttsas_instance.get_representatives()
64 
65  # Display results using BSAS visualizer.
66  bsas_visualizer.show_clusters(sample, clusters, representatives)
67  @endcode
68 
69  @see pyclustering.cluster.bsas, pyclustering.cluster.mbsas
70 
71  """
72 
73  def __init__(self, data, threshold1, threshold2, ccore=True, **kwargs):
74  """!
75  @brief Creates TTSAS algorithm.
76 
77  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
78  @param[in] threshold1: Dissimilarity level (distance) between point and its closest cluster, if the distance is
79  less than 'threshold1' value then point is assigned to the cluster.
80  @param[in] threshold2: Dissimilarity level (distance) between point and its closest cluster, if the distance is
81  greater than 'threshold2' value then point is considered as a new cluster.
82  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
83  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
84 
85  <b>Keyword Args:</b><br>
86  - metric (distance_metric): Metric that is used for distance calculation between two points.
87 
88  """
89 
90  self._threshold2 = threshold2;
91  self._amount_skipped_objects = len(data);
92  self._skipped_objects = [ True ] * len(data);
93 
94  super().__init__(data, len(data), threshold1, ccore, **kwargs);
95 
96 
97  def process(self):
98  """!
99  @brief Performs cluster analysis in line with rules of BSAS algorithm.
100 
101  @remark Results of clustering can be obtained using corresponding get methods.
102 
103  @see get_clusters()
104  @see get_representatives()
105 
106  """
107 
108  if self._ccore is True:
109  self.__process_by_ccore();
110  else:
111  self.__prcess_by_python();
112 
113 
114  def __process_by_ccore(self):
115  ccore_metric = metric_wrapper.create_instance(self._metric);
116  self._clusters, self._representatives = ttsas_wrapper(self._data, self._threshold, self._threshold2, ccore_metric.get_pointer());
117 
118 
119  def __prcess_by_python(self):
120  changes = 0;
121  while self._amount_skipped_objects != 0:
122  previous_amount = self._amount_skipped_objects;
123  self.__process_objects(changes);
124 
125  changes = previous_amount - self._amount_skipped_objects;
126 
127 
128  def __process_objects(self, changes):
129  index_point = self._skipped_objects.index(True);
130 
131  if changes == 0:
132  self.__allocate_cluster(index_point, self._data[index_point]);
133  index_point += 1;
134 
135  for i in range(index_point, len(self._data)):
136  if self._skipped_objects[i] is True:
137  self.__process_skipped_object(i);
138 
139 
140  def __process_skipped_object(self, index_point):
141  point = self._data[index_point];
142 
143  index_cluster, distance = self._find_nearest_cluster(point);
144 
145  if distance <= self._threshold:
146  self.__append_to_cluster(index_cluster, index_point, point);
147  elif distance > self._threshold2:
148  self.__allocate_cluster(index_point, point);
149 
150 
151  def __append_to_cluster(self, index_cluster, index_point, point):
152  self._clusters[index_cluster].append(index_point);
153  self._update_representative(index_cluster, point);
154 
155  self._amount_skipped_objects -= 1;
156  self._skipped_objects[index_point] = False;
157 
158 
159  def __allocate_cluster(self, index_point, point):
160  self._clusters.append( [index_point] );
161  self._representatives.append(point);
162 
163  self._amount_skipped_objects -= 1;
164  self._skipped_objects[index_point] = False;
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme. ...
Definition: bsas.py:77
Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
Definition: bsas.py:1
def __process_skipped_object(self, index_point)
Definition: ttsas.py:140
Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
Definition: ttsas.py:34
def __allocate_cluster(self, index_point, point)
Definition: ttsas.py:159
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:221
def __prcess_by_python(self)
Definition: bsas.py:170
def __init__(self, data, threshold1, threshold2, ccore=True, kwargs)
Creates TTSAS algorithm.
Definition: ttsas.py:73
def __process_by_ccore(self)
Definition: bsas.py:165
def __append_to_cluster(self, index_cluster, index_point, point)
Definition: ttsas.py:151
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:242
def process(self)
Performs cluster analysis in line with rules of BSAS algorithm.
Definition: ttsas.py:97
def __process_objects(self, changes)
Definition: ttsas.py:128