somsc.py
1 """!
2 
3 @brief Cluster analysis algorithm: SOM-SC (Self-Organized Feature Map for Simple Clustering)
4 @details There is no paper on which implementation is based. Algorithm SOM-SC is adaptation of SOM for cluster analysis in simple way.
5  Basic idea: amount of cluster that should be allocated is defines amount of neurons in the self-organized map. SOM-SC can be
6  considered as neural network implementation of K-Means algorithm.
7  Implementation based on paper @cite article::nnet::som::1.
8 
9 @authors Andrei Novikov (pyclustering@yandex.ru)
10 @date 2014-2020
11 @copyright GNU Public License
12 
13 @cond GNU_PUBLIC_LICENSE
14  PyClustering is free software: you can redistribute it and/or modify
15  it under the terms of the GNU General Public License as published by
16  the Free Software Foundation, either version 3 of the License, or
17  (at your option) any later version.
18 
19  PyClustering is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  GNU General Public License for more details.
23 
24  You should have received a copy of the GNU General Public License
25  along with this program. If not, see <http://www.gnu.org/licenses/>.
26 @endcond
27 
28 """
29 
30 
31 from pyclustering.core.wrapper import ccore_library
32 from pyclustering.cluster.encoder import type_encoding
33 from pyclustering.nnet.som import som, som_parameters
34 from pyclustering.nnet.som import type_conn
35 
36 
37 class somsc:
38  """!
39  @brief Class represents a simple clustering algorithm based on the self-organized feature map.
40  @details This algorithm uses amount of clusters that should be allocated as a size of SOM map. Captured
41  objects by neurons are considered as clusters. The algorithm is designed to process data with Gaussian
42  distribution that has spherical forms.
43 
44  Example:
45  @code
46  from pyclustering.cluster import cluster_visualizer
47  from pyclustering.cluster.somsc import somsc
48  from pyclustering.samples.definitions import FCPS_SAMPLES
49  from pyclustering.utils import read_sample
50 
51  # Load list of points for cluster analysis
52  sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS)
53 
54  # Create instance of SOM-SC algorithm to allocated two clusters
55  somsc_instance = somsc(sample, 2)
56 
57  # Run cluster analysis and obtain results
58  somsc_instance.process()
59  clusters = somsc_instance.get_clusters()
60 
61  # Visualize clustering results.
62  visualizer = cluster_visualizer()
63  visualizer.append_clusters(clusters, sample)
64  visualizer.show()
65  @endcode
66 
67  """
68 
69  def __init__(self, data, amount_clusters, epouch=100, ccore=True, **kwargs):
70  """!
71  @brief Creates SOM-SC (Self Organized Map for Simple Clustering) algorithm for clustering analysis.
72 
73  @param[in] data (list): List of points that are used for processing.
74  @param[in] amount_clusters (uint): Amount of clusters that should be allocated.
75  @param[in] epouch (uint): Number of epochs for training of SOM.
76  @param[in] ccore (bool): If it is True then CCORE implementation will be used for clustering analysis.
77  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: `random_state`).
78 
79  <b>Keyword Args:</b><br>
80  - random_state (int): Seed for random state (by default is `None`, current system time is used).
81 
82  """
83 
84  self.__data_pointer = data
85  self.__amount_clusters = amount_clusters
86  self.__epouch = epouch
87  self.__ccore = ccore
88  self.__random_state = kwargs.get('random_state', None)
89 
90  self.__network = None
91 
92  if self.__ccore is True:
93  self.__ccore = ccore_library.workable()
94 
95  self.__verify_arguments()
96 
97 
98  def process(self):
99  """!
100  @brief Performs cluster analysis by competition between neurons in self-organized map.
101 
102  @return (somsc) Returns itself (SOM Simple Clustering instance).
103 
104  @see get_clusters()
105 
106  """
107 
108  params = som_parameters()
109  params.random_state = self.__random_state
110 
111  self.__network = som(1, self.__amount_clusters, type_conn.grid_four, params, self.__ccore)
112  self.__network.train(self.__data_pointer, self.__epouch, True)
113 
114  return self
115 
116 
117  def predict(self, points):
118  """!
119  @brief Calculates the closest cluster to each point.
120 
121  @param[in] points (array_like): Points for which closest clusters are calculated.
122 
123  @return (list) List of closest clusters for each point. Each cluster is denoted by index. Return empty
124  collection if 'process()' method was not called.
125 
126  """
127 
128  result = []
129  for point in points:
130  index_cluster = self.__network.simulate(point)
131  result.append(index_cluster)
132 
133  return result
134 
135 
136  def get_clusters(self):
137  """!
138  @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
139 
140  @see process()
141 
142  """
143 
144  return self.__network.capture_objects
145 
146 
148  """!
149  @brief Returns clustering result representation type that indicate how clusters are encoded.
150 
151  @return (type_encoding) Clustering result representation.
152 
153  @see get_clusters()
154 
155  """
156 
157  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
158 
159 
160  def __verify_arguments(self):
161  """!
162  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
163 
164  """
165  if len(self.__data_pointer) == 0:
166  raise ValueError("Input data is empty (size: '%d')." % len(self.__data_pointer))
167 
168  if self.__amount_clusters <= 0:
169  raise ValueError("Amount of clusters (current value: '%d') should be greater than 0." %
170  self.__amount_clusters)
171 
172  if self.__epouch < 0:
173  raise ValueError("Amount of epouch (current value: '%d') should be greater or equal to 0." %
174  self.__epouch)
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: somsc.py:147
Represents SOM parameters.
Definition: som.py:89
Module for representing clustering results.
Definition: encoder.py:1
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: somsc.py:160
def get_clusters(self)
Returns list of allocated clusters, each cluster contains indexes of objects in list of data...
Definition: somsc.py:136
Class represents a simple clustering algorithm based on the self-organized feature map...
Definition: somsc.py:37
def process(self)
Performs cluster analysis by competition between neurons in self-organized map.
Definition: somsc.py:98
def __init__(self, data, amount_clusters, epouch=100, ccore=True, kwargs)
Creates SOM-SC (Self Organized Map for Simple Clustering) algorithm for clustering analysis...
Definition: somsc.py:69
Neural Network: Self-Organized Feature Map.
Definition: som.py:1
def predict(self, points)
Calculates the closest cluster to each point.
Definition: somsc.py:117
Represents self-organized feature map (SOM).
Definition: som.py:117