encoder.py
1 """!
2 
3 @brief Module for representing clustering results.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2020
7 @copyright GNU Public License
8 
9 @cond GNU_PUBLIC_LICENSE
10  PyClustering is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23 
24 """
25 
26 import math
27 
28 from enum import IntEnum
29 
30 
31 class type_encoding(IntEnum):
32  """!
33  @brief Enumeration of encoding types (index labeling, index list separation, object list separation).
34 
35  """
36 
37 
38  CLUSTER_INDEX_LABELING = 0
39 
40 
41  CLUSTER_INDEX_LIST_SEPARATION = 1
42 
43 
44  CLUSTER_OBJECT_LIST_SEPARATION = 2
45 
46 
48  """!
49  @brief Provides service to change clustering result representation.
50  @details There are three general types of representation:
51  1. Index List Separation that is defined by `CLUSTER_INDEX_LIST_SEPARATION`, for example `[[0, 1, 2], [3, 4], [5, 6, 7]`.
52  2. Index Labeling that is defined by `CLUSTER_INDEX_LABELING`, for example `[0, 0, 0, 1, 1, 2, 2, 2]`.
53  3. Object List Separation that is defined by `CLUSTER_OBJECT_LIST_SEPARATION`, for example `[[obj1, obj2, obj3], [obj4, obj5], [obj5, obj6, obj7]`.
54 
55  There is an example how to covert default Index List Separation to other types:
56  @code
57  from pyclustering.utils import read_sample
58  from pyclustering.samples.definitions import SIMPLE_SAMPLES
59 
60  from pyclustering.cluster.encoder import type_encoding, cluster_encoder
61  from pyclustering.cluster.kmeans import kmeans
62 
63  # load list of points for cluster analysis
64  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1)
65 
66  # create instance of K-Means algorithm
67  kmeans_instance = kmeans(sample, [[3.0, 5.1], [6.5, 8.6]])
68 
69  # run cluster analysis and obtain results
70  kmeans_instance.process()
71  clusters = kmeans_instance.get_clusters()
72  print("Index List Separation:", clusters)
73 
74  # by default k-means returns representation CLUSTER_INDEX_LIST_SEPARATION
75  type_repr = kmeans_instance.get_cluster_encoding()
76  encoder = cluster_encoder(type_repr, clusters, sample)
77 
78  # change representation from index list to label list
79  encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
80  print("Index Labeling:", encoder.get_clusters())
81 
82  # change representation from label to object list
83  encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
84  print("Object List Separation:", encoder.get_clusters())
85  @endcode
86 
87  Output of the code above is following:
88  @code
89  Index List Separation: [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
90  Index Labeling: [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
91  Object List Separation: [[[3.522979, 5.487981], [3.768699, 5.364477], [3.423602, 5.4199], [3.803905, 5.389491], [3.93669, 5.663041]], [[6.968136, 7.755556], [6.750795, 7.269541], [6.593196, 7.850364], [6.978178, 7.60985], [6.554487, 7.498119]]]
92  @endcode
93 
94  If there is no index or object in clusters that exists in an input data then it is going to be marked as `NaN` in
95  case of Index Labeling. Here is an example:
96  @code
97  from pyclustering.cluster.encoder import type_encoding, cluster_encoder
98 
99  # An input data.
100  sample = [[1.0, 1.2], [1.2, 2.3], [114.3, 54.1], [2.2, 1.4], [5.3, 1.3]]
101 
102  # Clusters do not contains object with index 2 ([114.3, 54.1]) because it is outline.
103  clusters = [[0, 1], [3, 4]]
104 
105  encoder = cluster_encoder(type_encoding.CLUSTER_INDEX_LIST_SEPARATION, clusters, sample)
106  encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
107 
108  print("Index Labeling:", encoder.get_clusters())
109  @endcode
110 
111  Here is an output of the code above. Pay attention to `NaN` value for the object with index 2 `[114.3, 54.1]`.
112  @code
113  Index Labeling: [0, 0, nan, 1, 1]
114  @endcode
115 
116  """
117 
118  def __init__(self, encoding, clusters, data):
119  """!
120  @brief Constructor of clustering result representor.
121 
122  @param[in] encoding (type_encoding): Type of clusters representation (Index List, Object List or Labels).
123  @param[in] clusters (list): Clusters that were allocated from an input data.
124  @param[in] data (list): Data that was used for cluster analysis.
125 
126  @see type_encoding
127 
128  """
129 
130  self.__type_representation = encoding
131  self.__clusters = clusters
132  self.__data = data
133 
134 
135  @property
136  def get_encoding(self):
137  """!
138  @brief Returns current cluster representation.
139 
140  """
141  return self.__type_representation
142 
143 
144  def get_clusters(self):
145  """!
146  @brief Returns clusters that are represented in line with type that is defined by `get_encoding()`.
147 
148  @see get_encoding()
149 
150  """
151  return self.__clusters
152 
153 
154  def get_data(self):
155  """!
156  @brief Returns data that was used for cluster analysis.
157 
158  """
159  return self.__data
160 
161 
162  def set_encoding(self, encoding):
163  """!
164  @brief Change clusters encoding to specified type (Index List, Object List, Labeling).
165 
166  @param[in] encoding (type_encoding): New type of clusters representation.
167 
168  @return (cluster_encoder) Return itself.
169 
170  """
171 
172  if encoding == self.__type_representation:
173  return self
174 
175  if self.__type_representation == type_encoding.CLUSTER_INDEX_LABELING:
176  if encoding == type_encoding.CLUSTER_INDEX_LIST_SEPARATION:
178 
179  else:
181 
182  elif self.__type_representation == type_encoding.CLUSTER_INDEX_LIST_SEPARATION:
183  if encoding == type_encoding.CLUSTER_INDEX_LABELING:
185 
186  else:
188 
189  else:
190  if encoding == type_encoding.CLUSTER_INDEX_LABELING:
192 
193  else:
195 
196  self.__type_representation = encoding
197  return self
198 
199 
200  def __convert_index_to_label(self):
201  clusters = [float('NaN')] * len(self.__data)
202  index_cluster = 0
203 
204  for cluster in self.__clusters:
205  for index_object in cluster:
206  clusters[index_object] = index_cluster
207 
208  index_cluster += 1
209 
210  return clusters
211 
212 
213  def __convert_index_to_object(self):
214  clusters = [ [] for _ in range(len(self.__clusters)) ]
215  for index_cluster in range(len(self.__clusters)):
216  for index_object in self.__clusters[index_cluster]:
217  data_object = self.__data[index_object]
218  clusters[index_cluster].append(data_object)
219 
220  return clusters
221 
222 
223  def __convert_object_to_label(self):
224  positions = dict()
225  clusters = [float('NaN')] * len(self.__data)
226  index_cluster = 0
227 
228  for cluster in self.__clusters:
229  for data_object in cluster:
230  hashable_data_object = str(data_object)
231  if hashable_data_object in positions:
232  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1)
233  else:
234  index_object = self.__data.index(data_object)
235 
236  clusters[index_object] = index_cluster
237  positions[hashable_data_object] = index_object
238 
239  index_cluster += 1
240 
241  return clusters
242 
243 
244  def __convert_object_to_index(self):
245  positions = dict()
246  clusters = [[] for _ in range(len(self.__clusters))]
247  for index_cluster in range(len(self.__clusters)):
248  for data_object in self.__clusters[index_cluster]:
249  hashable_data_object = str(data_object)
250  if hashable_data_object in positions:
251  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1)
252  else:
253  index_object = self.__data.index(data_object)
254 
255  clusters[index_cluster].append(index_object)
256  positions[hashable_data_object] = index_object
257 
258  return clusters
259 
260 
261  def __convert_label_to_index(self):
262  clusters = [[] for _ in range(max(self.__clusters) + 1)]
263 
264  for index_object in range(len(self.__data)):
265  index_cluster = self.__clusters[index_object]
266  if not math.isnan(index_cluster):
267  clusters[index_cluster].append(index_object)
268 
269  return clusters
270 
271 
272  def __convert_label_to_object(self):
273  clusters = [[] for _ in range(max(self.__clusters) + 1)]
274 
275  for index_object in range(len(self.__data)):
276  index_cluster = self.__clusters[index_object]
277  if not math.isnan(index_cluster):
278  clusters[index_cluster].append(self.__data[index_object])
279 
280  return clusters
def set_encoding(self, encoding)
Change clusters encoding to specified type (Index List, Object List, Labeling).
Definition: encoder.py:162
Enumeration of encoding types (index labeling, index list separation, object list separation)...
Definition: encoder.py:31
def get_encoding(self)
Returns current cluster representation.
Definition: encoder.py:136
def get_clusters(self)
Returns clusters that are represented in line with type that is defined by get_encoding().
Definition: encoder.py:144
Provides service to change clustering result representation.
Definition: encoder.py:47
def get_data(self)
Returns data that was used for cluster analysis.
Definition: encoder.py:154
def __init__(self, encoding, clusters, data)
Constructor of clustering result representor.
Definition: encoder.py:118