ttsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: TTSAS (Two-Threshold Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 from pyclustering.core.ttsas_wrapper import ttsas as ttsas_wrapper
29 from pyclustering.core.metric_wrapper import metric_wrapper
30 
31 from pyclustering.cluster.bsas import bsas
32 
33 
34 class ttsas(bsas):
35  """!
36  @brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
37  @details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
38  TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
39  to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
40  second - if distance to the nearest cluster is greater than the second threshold then new cluster is
41  allocated.
42 
43  Code example of TTSAS usage:
44  @code
45  from pyclustering.cluster.bsas import bsas_visualizer
46  from pyclustering.cluster.ttsas import ttsas
47  from pyclustering.samples.definitions import SIMPLE_SAMPLES
48  from pyclustering.utils import read_sample
49 
50  # Read data sample from 'Simple03.data'.
51  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
52 
53  # Prepare algorithm's parameters.
54  threshold1 = 1.0
55  threshold2 = 2.0
56 
57  # Create instance of TTSAS algorithm.
58  ttsas_instance = ttsas(sample, threshold1, threshold2)
59  ttsas_instance.process()
60 
61  # Get clustering results.
62  clusters = ttsas_instance.get_clusters()
63  representatives = ttsas_instance.get_representatives()
64 
65  # Display results using BSAS visualizer.
66  bsas_visualizer.show_clusters(sample, clusters, representatives)
67  @endcode
68 
69  @see pyclustering.cluster.bsas, pyclustering.cluster.mbsas
70 
71  """
72 
73  def __init__(self, data, threshold1, threshold2, ccore=True, **kwargs):
74  """!
75  @brief Creates TTSAS algorithm.
76 
77  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
78  @param[in] threshold1: Dissimilarity level (distance) between point and its closest cluster, if the distance is
79  less than 'threshold1' value then point is assigned to the cluster.
80  @param[in] threshold2: Dissimilarity level (distance) between point and its closest cluster, if the distance is
81  greater than 'threshold2' value then point is considered as a new cluster.
82  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
83  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
84 
85  <b>Keyword Args:</b><br>
86  - metric (distance_metric): Metric that is used for distance calculation between two points.
87 
88  """
89 
90  self._threshold2 = threshold2
91  self._amount_skipped_objects = len(data)
92  self._skipped_objects = [ True ] * len(data)
93 
94  super().__init__(data, len(data), threshold1, ccore, **kwargs)
95 
96 
97  def process(self):
98  """!
99  @brief Performs cluster analysis in line with rules of TTSAS algorithm.
100 
101  @return (ttsas) Returns itself (TTSAS instance).
102 
103  @see get_clusters()
104  @see get_representatives()
105 
106  """
107 
108  if self._ccore is True:
109  self.__process_by_ccore()
110  else:
111  self.__prcess_by_python()
112 
113  return self
114 
115 
116  def __process_by_ccore(self):
117  ccore_metric = metric_wrapper.create_instance(self._metric)
118  self._clusters, self._representatives = ttsas_wrapper(self._data, self._threshold, self._threshold2, ccore_metric.get_pointer())
119 
120 
121  def __prcess_by_python(self):
122  changes = 0
123  while self._amount_skipped_objects != 0:
124  previous_amount = self._amount_skipped_objects
125  self.__process_objects(changes)
126 
127  changes = previous_amount - self._amount_skipped_objects
128 
129 
130  def __process_objects(self, changes):
131  index_point = self._skipped_objects.index(True)
132 
133  if changes == 0:
134  self.__allocate_cluster(index_point, self._data[index_point])
135  index_point += 1
136 
137  for i in range(index_point, len(self._data)):
138  if self._skipped_objects[i] is True:
140 
141 
142  def __process_skipped_object(self, index_point):
143  point = self._data[index_point]
144 
145  index_cluster, distance = self._find_nearest_cluster(point)
146 
147  if distance <= self._threshold:
148  self.__append_to_cluster(index_cluster, index_point, point)
149  elif distance > self._threshold2:
150  self.__allocate_cluster(index_point, point)
151 
152 
153  def __append_to_cluster(self, index_cluster, index_point, point):
154  self._clusters[index_cluster].append(index_point)
155  self._update_representative(index_cluster, point)
156 
157  self._amount_skipped_objects -= 1
158  self._skipped_objects[index_point] = False
159 
160 
161  def __allocate_cluster(self, index_point, point):
162  self._clusters.append( [index_point] )
163  self._representatives.append(point)
164 
165  self._amount_skipped_objects -= 1
166  self._skipped_objects[index_point] = False
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme. ...
Definition: bsas.py:77
Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
Definition: bsas.py:1
def __process_skipped_object(self, index_point)
Definition: ttsas.py:142
Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
Definition: ttsas.py:34
def __allocate_cluster(self, index_point, point)
Definition: ttsas.py:161
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:227
def __prcess_by_python(self)
Definition: bsas.py:176
def __init__(self, data, threshold1, threshold2, ccore=True, kwargs)
Creates TTSAS algorithm.
Definition: ttsas.py:73
def __process_by_ccore(self)
Definition: bsas.py:171
def __append_to_cluster(self, index_cluster, index_point, point)
Definition: ttsas.py:153
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:248
def process(self)
Performs cluster analysis in line with rules of TTSAS algorithm.
Definition: ttsas.py:97
def __process_objects(self, changes)
Definition: ttsas.py:130