pyclustering  0.10.1
pyclustring is a Python, C++ data mining library.
ttsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: TTSAS (Two-Threshold Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright BSD-3-Clause
9 
10 """
11 
12 
13 from pyclustering.core.ttsas_wrapper import ttsas as ttsas_wrapper
14 from pyclustering.core.metric_wrapper import metric_wrapper
15 
16 from pyclustering.cluster.bsas import bsas
17 
18 
19 class ttsas(bsas):
20  """!
21  @brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
22  @details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
23  TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
24  to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
25  second - if distance to the nearest cluster is greater than the second threshold then new cluster is
26  allocated.
27 
28  Code example of TTSAS usage:
29  @code
30  from pyclustering.cluster.bsas import bsas_visualizer
31  from pyclustering.cluster.ttsas import ttsas
32  from pyclustering.samples.definitions import SIMPLE_SAMPLES
33  from pyclustering.utils import read_sample
34 
35  # Read data sample from 'Simple03.data'.
36  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
37 
38  # Prepare algorithm's parameters.
39  threshold1 = 1.0
40  threshold2 = 2.0
41 
42  # Create instance of TTSAS algorithm.
43  ttsas_instance = ttsas(sample, threshold1, threshold2)
44  ttsas_instance.process()
45 
46  # Get clustering results.
47  clusters = ttsas_instance.get_clusters()
48  representatives = ttsas_instance.get_representatives()
49 
50  # Display results using BSAS visualizer.
51  bsas_visualizer.show_clusters(sample, clusters, representatives)
52  @endcode
53 
54  @see pyclustering.cluster.bsas, pyclustering.cluster.mbsas
55 
56  """
57 
58  def __init__(self, data, threshold1, threshold2, ccore=True, **kwargs):
59  """!
60  @brief Creates TTSAS algorithm.
61 
62  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
63  @param[in] threshold1: Dissimilarity level (distance) between point and its closest cluster, if the distance is
64  less than 'threshold1' value then point is assigned to the cluster.
65  @param[in] threshold2: Dissimilarity level (distance) between point and its closest cluster, if the distance is
66  greater than 'threshold2' value then point is considered as a new cluster.
67  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
68  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
69 
70  <b>Keyword Args:</b><br>
71  - metric (distance_metric): Metric that is used for distance calculation between two points.
72 
73  """
74 
75  self._threshold2 = threshold2
76  self._amount_skipped_objects = len(data)
77  self._skipped_objects = [ True ] * len(data)
78 
79  super().__init__(data, len(data), threshold1, ccore, **kwargs)
80 
81 
82  def process(self):
83  """!
84  @brief Performs cluster analysis in line with rules of TTSAS algorithm.
85 
86  @return (ttsas) Returns itself (TTSAS instance).
87 
88  @see get_clusters()
89  @see get_representatives()
90 
91  """
92 
93  if self._ccore is True:
94  self.__process_by_ccore()
95  else:
96  self.__prcess_by_python()
97 
98  return self
99 
100 
101  def __process_by_ccore(self):
102  ccore_metric = metric_wrapper.create_instance(self._metric)
103  self._clusters, self._representatives = ttsas_wrapper(self._data, self._threshold, self._threshold2, ccore_metric.get_pointer())
104 
105 
106  def __prcess_by_python(self):
107  changes = 0
108  while self._amount_skipped_objects != 0:
109  previous_amount = self._amount_skipped_objects
110  self.__process_objects(changes)
111 
112  changes = previous_amount - self._amount_skipped_objects
113 
114 
115  def __process_objects(self, changes):
116  index_point = self._skipped_objects.index(True)
117 
118  if changes == 0:
119  self.__allocate_cluster(index_point, self._data[index_point])
120  index_point += 1
121 
122  for i in range(index_point, len(self._data)):
123  if self._skipped_objects[i] is True:
125 
126 
127  def __process_skipped_object(self, index_point):
128  point = self._data[index_point]
129 
130  index_cluster, distance = self._find_nearest_cluster(point)
131 
132  if distance <= self._threshold:
133  self.__append_to_cluster(index_cluster, index_point, point)
134  elif distance > self._threshold2:
135  self.__allocate_cluster(index_point, point)
136 
137 
138  def __append_to_cluster(self, index_cluster, index_point, point):
139  self._clusters[index_cluster].append(index_point)
140  self._update_representative(index_cluster, point)
141 
142  self._amount_skipped_objects -= 1
143  self._skipped_objects[index_point] = False
144 
145 
146  def __allocate_cluster(self, index_point, point):
147  self._clusters.append( [index_point] )
148  self._representatives.append(point)
149 
150  self._amount_skipped_objects -= 1
151  self._skipped_objects[index_point] = False
pyclustering.cluster.bsas.bsas.__prcess_by_python
def __prcess_by_python(self)
Definition: bsas.py:161
pyclustering.cluster.ttsas.ttsas.__process_skipped_object
def __process_skipped_object(self, index_point)
Definition: ttsas.py:127
pyclustering.cluster.ttsas.ttsas._skipped_objects
_skipped_objects
Definition: ttsas.py:77
pyclustering.cluster.ttsas.ttsas.__allocate_cluster
def __allocate_cluster(self, index_point, point)
Definition: ttsas.py:146
pyclustering.cluster.bsas.bsas._update_representative
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:233
pyclustering.cluster.bsas.bsas._representatives
_representatives
Definition: bsas.py:127
pyclustering.cluster.bsas.bsas.__process_by_ccore
def __process_by_ccore(self)
Definition: bsas.py:156
pyclustering.cluster.bsas.bsas._metric
_metric
Definition: bsas.py:123
pyclustering.cluster.bsas
Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
Definition: bsas.py:1
pyclustering.cluster.bsas.bsas._find_nearest_cluster
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:212
pyclustering.cluster.ttsas.ttsas
Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
Definition: ttsas.py:19
pyclustering.cluster.bsas.bsas._ccore
_ccore
Definition: bsas.py:124
pyclustering.cluster.bsas.bsas._data
_data
Definition: bsas.py:120
pyclustering.cluster.ttsas.ttsas.__append_to_cluster
def __append_to_cluster(self, index_cluster, index_point, point)
Definition: ttsas.py:138
pyclustering.cluster.ttsas.ttsas.__init__
def __init__(self, data, threshold1, threshold2, ccore=True, **kwargs)
Creates TTSAS algorithm.
Definition: ttsas.py:58
pyclustering.cluster.ttsas.ttsas._amount_skipped_objects
_amount_skipped_objects
Definition: ttsas.py:76
pyclustering.cluster.bsas.bsas._threshold
_threshold
Definition: bsas.py:122
pyclustering.cluster.ttsas.ttsas._threshold2
_threshold2
Definition: ttsas.py:75
pyclustering.cluster.ttsas.ttsas.process
def process(self)
Performs cluster analysis in line with rules of TTSAS algorithm.
Definition: ttsas.py:82
pyclustering.cluster.ttsas.ttsas.__process_objects
def __process_objects(self, changes)
Definition: ttsas.py:115
pyclustering.cluster.bsas.bsas._clusters
_clusters
Definition: bsas.py:126
pyclustering.cluster.bsas.bsas
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
Definition: bsas.py:62