pyclustering  0.10.1
pyclustring is a Python, C++ data mining library.
mbsas.py
1 """!
2 
3 @brief Cluster analysis algorithm: MBSAS (Modified Basic Sequential Algorithmic Scheme).
4 @details Implementation based on paper @cite book::pattern_recognition::2009.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright BSD-3-Clause
9 
10 """
11 
12 
13 from pyclustering.core.mbsas_wrapper import mbsas as mbsas_wrapper
14 from pyclustering.core.metric_wrapper import metric_wrapper
15 
16 from pyclustering.cluster.bsas import bsas
17 
18 
19 class mbsas(bsas):
20  """!
21  @brief Class represents MBSAS (Modified Basic Sequential Algorithmic Scheme).
22  @details Interface of MBSAS algorithm is the same as for BSAS. This algorithm performs clustering in two steps.
23  The first - is determination of amount of clusters. The second - is assignment of points that were not
24  marked as a cluster representatives to clusters.
25 
26  Code example of MBSAS usage:
27  @code
28  from pyclustering.cluster.bsas import bsas_visualizer
29  from pyclustering.cluster.mbsas import mbsas
30  from pyclustering.utils import read_sample
31  from pyclustering.samples.definitions import SIMPLE_SAMPLES
32 
33  # Read data sample from 'Simple02.data'.
34  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2)
35 
36  # Prepare algorithm's parameters.
37  max_clusters = 3
38  threshold = 1.0
39 
40  # Create instance of MBSAS algorithm.
41  mbsas_instance = mbsas(sample, max_clusters, threshold)
42  mbsas_instance.process()
43 
44  # Get clustering results.
45  clusters = mbsas_instance.get_clusters()
46  representatives = mbsas_instance.get_representatives()
47 
48  # Display results.
49  bsas_visualizer.show_clusters(sample, clusters, representatives)
50  @endcode
51 
52  @see pyclustering.cluster.bsas, pyclustering.cluster.ttsas
53 
54  """
55 
56  def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
57  """!
58  @brief Creates MBSAS algorithm.
59 
60  @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
61  @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
62  @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
63  @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
64  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
65 
66  <b>Keyword Args:</b><br>
67  - metric (distance_metric): Metric that is used for distance calculation between two points.
68 
69  """
70  super().__init__(data, maximum_clusters, threshold, ccore, **kwargs)
71 
72 
73  def process(self):
74  """!
75  @brief Performs cluster analysis in line with MBSAS algorithm.
76 
77  @return (mbsas) Returns itself (MBSAS instance).
78 
79  @see get_clusters()
80  @see get_representatives()
81 
82  """
83 
84  if self._ccore is True:
85  self.__process_by_ccore()
86  else:
87  self.__prcess_by_python()
88 
89  return self
90 
91 
92  def __process_by_ccore(self):
93  ccore_metric = metric_wrapper.create_instance(self._metric)
94  self._clusters, self._representatives = mbsas_wrapper(self._data, self._amount, self._threshold, ccore_metric.get_pointer())
95 
96 
97  def __prcess_by_python(self):
98  self._clusters.append([0]);
99  self._representatives.append(self._data[0]);
100 
101  skipped_objects = [];
102 
103  for i in range(1, len(self._data)):
104  point = self._data[i];
105  index_cluster, distance = self._find_nearest_cluster(point);
106 
107  if (distance > self._threshold) and (len(self._clusters) < self._amount):
108  self._representatives.append(point);
109  self._clusters.append([i]);
110  else:
111  skipped_objects.append(i);
112 
113  for i in skipped_objects:
114  point = self._data[i];
115  index_cluster, _ = self._find_nearest_cluster(point);
116 
117  self._clusters[index_cluster].append(i);
118  self._update_representative(index_cluster, point);
pyclustering.cluster.mbsas.mbsas.process
def process(self)
Performs cluster analysis in line with MBSAS algorithm.
Definition: mbsas.py:73
pyclustering.cluster.bsas.bsas.__prcess_by_python
def __prcess_by_python(self)
Definition: bsas.py:161
pyclustering.cluster.bsas.bsas._update_representative
def _update_representative(self, index_cluster, point)
Update cluster representative in line with new cluster size and added point to it.
Definition: bsas.py:233
pyclustering.cluster.bsas.bsas._amount
_amount
Definition: bsas.py:121
pyclustering.cluster.bsas.bsas._representatives
_representatives
Definition: bsas.py:127
pyclustering.cluster.bsas.bsas.__process_by_ccore
def __process_by_ccore(self)
Definition: bsas.py:156
pyclustering.cluster.bsas.bsas._metric
_metric
Definition: bsas.py:123
pyclustering.cluster.bsas
Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
Definition: bsas.py:1
pyclustering.cluster.bsas.bsas._find_nearest_cluster
def _find_nearest_cluster(self, point)
Find nearest cluster to the specified point.
Definition: bsas.py:212
pyclustering.cluster.bsas.bsas._ccore
_ccore
Definition: bsas.py:124
pyclustering.cluster.mbsas.mbsas.__init__
def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs)
Creates MBSAS algorithm.
Definition: mbsas.py:56
pyclustering.cluster.bsas.bsas._data
_data
Definition: bsas.py:120
pyclustering.cluster.bsas.bsas._threshold
_threshold
Definition: bsas.py:122
pyclustering.cluster.mbsas.mbsas
Class represents MBSAS (Modified Basic Sequential Algorithmic Scheme).
Definition: mbsas.py:19
pyclustering.cluster.bsas.bsas._clusters
_clusters
Definition: bsas.py:126
pyclustering.cluster.bsas.bsas
Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
Definition: bsas.py:62