pyclustering  0.10.1
pyclustring is a Python, C++ data mining library.
encoder.py
1 """!
2 
3 @brief Module for representing clustering results.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2020
7 @copyright BSD-3-Clause
8 
9 """
10 
11 import math
12 
13 from enum import IntEnum
14 
15 
16 class type_encoding(IntEnum):
17  """!
18  @brief Enumeration of encoding types (index labeling, index list separation, object list separation).
19 
20  """
21 
22 
23  CLUSTER_INDEX_LABELING = 0
24 
25 
26  CLUSTER_INDEX_LIST_SEPARATION = 1
27 
28 
29  CLUSTER_OBJECT_LIST_SEPARATION = 2
30 
31 
33  """!
34  @brief Provides service to change clustering result representation.
35  @details There are three general types of representation:
36  1. Index List Separation that is defined by `CLUSTER_INDEX_LIST_SEPARATION`, for example `[[0, 1, 2], [3, 4], [5, 6, 7]`.
37  2. Index Labeling that is defined by `CLUSTER_INDEX_LABELING`, for example `[0, 0, 0, 1, 1, 2, 2, 2]`.
38  3. Object List Separation that is defined by `CLUSTER_OBJECT_LIST_SEPARATION`, for example `[[obj1, obj2, obj3], [obj4, obj5], [obj5, obj6, obj7]`.
39 
40  There is an example how to covert default Index List Separation to other types:
41  @code
42  from pyclustering.utils import read_sample
43  from pyclustering.samples.definitions import SIMPLE_SAMPLES
44 
45  from pyclustering.cluster.encoder import type_encoding, cluster_encoder
46  from pyclustering.cluster.kmeans import kmeans
47 
48  # load list of points for cluster analysis
49  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1)
50 
51  # create instance of K-Means algorithm
52  kmeans_instance = kmeans(sample, [[3.0, 5.1], [6.5, 8.6]])
53 
54  # run cluster analysis and obtain results
55  kmeans_instance.process()
56  clusters = kmeans_instance.get_clusters()
57  print("Index List Separation:", clusters)
58 
59  # by default k-means returns representation CLUSTER_INDEX_LIST_SEPARATION
60  type_repr = kmeans_instance.get_cluster_encoding()
61  encoder = cluster_encoder(type_repr, clusters, sample)
62 
63  # change representation from index list to label list
64  encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
65  print("Index Labeling:", encoder.get_clusters())
66 
67  # change representation from label to object list
68  encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
69  print("Object List Separation:", encoder.get_clusters())
70  @endcode
71 
72  Output of the code above is following:
73  @code
74  Index List Separation: [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
75  Index Labeling: [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
76  Object List Separation: [[[3.522979, 5.487981], [3.768699, 5.364477], [3.423602, 5.4199], [3.803905, 5.389491], [3.93669, 5.663041]], [[6.968136, 7.755556], [6.750795, 7.269541], [6.593196, 7.850364], [6.978178, 7.60985], [6.554487, 7.498119]]]
77  @endcode
78 
79  If there is no index or object in clusters that exists in an input data then it is going to be marked as `NaN` in
80  case of Index Labeling. Here is an example:
81  @code
82  from pyclustering.cluster.encoder import type_encoding, cluster_encoder
83 
84  # An input data.
85  sample = [[1.0, 1.2], [1.2, 2.3], [114.3, 54.1], [2.2, 1.4], [5.3, 1.3]]
86 
87  # Clusters do not contains object with index 2 ([114.3, 54.1]) because it is outline.
88  clusters = [[0, 1], [3, 4]]
89 
90  encoder = cluster_encoder(type_encoding.CLUSTER_INDEX_LIST_SEPARATION, clusters, sample)
91  encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
92 
93  print("Index Labeling:", encoder.get_clusters())
94  @endcode
95 
96  Here is an output of the code above. Pay attention to `NaN` value for the object with index 2 `[114.3, 54.1]`.
97  @code
98  Index Labeling: [0, 0, nan, 1, 1]
99  @endcode
100 
101  """
102 
103  def __init__(self, encoding, clusters, data):
104  """!
105  @brief Constructor of clustering result representor.
106 
107  @param[in] encoding (type_encoding): Type of clusters representation (Index List, Object List or Labels).
108  @param[in] clusters (list): Clusters that were allocated from an input data.
109  @param[in] data (list): Data that was used for cluster analysis.
110 
111  @see type_encoding
112 
113  """
114 
115  self.__type_representation = encoding
116  self.__clusters = clusters
117  self.__data = data
118 
119 
120  @property
121  def get_encoding(self):
122  """!
123  @brief Returns current cluster representation.
124 
125  """
126  return self.__type_representation
127 
128 
129  def get_clusters(self):
130  """!
131  @brief Returns clusters that are represented in line with type that is defined by `get_encoding()`.
132 
133  @see get_encoding()
134 
135  """
136  return self.__clusters
137 
138 
139  def get_data(self):
140  """!
141  @brief Returns data that was used for cluster analysis.
142 
143  """
144  return self.__data
145 
146 
147  def set_encoding(self, encoding):
148  """!
149  @brief Change clusters encoding to specified type (Index List, Object List, Labeling).
150 
151  @param[in] encoding (type_encoding): New type of clusters representation.
152 
153  @return (cluster_encoder) Return itself.
154 
155  """
156 
157  if encoding == self.__type_representation:
158  return self
159 
160  if self.__type_representation == type_encoding.CLUSTER_INDEX_LABELING:
161  if encoding == type_encoding.CLUSTER_INDEX_LIST_SEPARATION:
163 
164  else:
166 
167  elif self.__type_representation == type_encoding.CLUSTER_INDEX_LIST_SEPARATION:
168  if encoding == type_encoding.CLUSTER_INDEX_LABELING:
170 
171  else:
173 
174  else:
175  if encoding == type_encoding.CLUSTER_INDEX_LABELING:
177 
178  else:
180 
181  self.__type_representation = encoding
182  return self
183 
184 
185  def __convert_index_to_label(self):
186  clusters = [float('NaN')] * len(self.__data)
187  index_cluster = 0
188 
189  for cluster in self.__clusters:
190  for index_object in cluster:
191  clusters[index_object] = index_cluster
192 
193  index_cluster += 1
194 
195  return clusters
196 
197 
198  def __convert_index_to_object(self):
199  clusters = [ [] for _ in range(len(self.__clusters)) ]
200  for index_cluster in range(len(self.__clusters)):
201  for index_object in self.__clusters[index_cluster]:
202  data_object = self.__data[index_object]
203  clusters[index_cluster].append(data_object)
204 
205  return clusters
206 
207 
208  def __convert_object_to_label(self):
209  positions = dict()
210  clusters = [float('NaN')] * len(self.__data)
211  index_cluster = 0
212 
213  for cluster in self.__clusters:
214  for data_object in cluster:
215  hashable_data_object = str(data_object)
216  if hashable_data_object in positions:
217  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1)
218  else:
219  index_object = self.__data.index(data_object)
220 
221  clusters[index_object] = index_cluster
222  positions[hashable_data_object] = index_object
223 
224  index_cluster += 1
225 
226  return clusters
227 
228 
229  def __convert_object_to_index(self):
230  positions = dict()
231  clusters = [[] for _ in range(len(self.__clusters))]
232  for index_cluster in range(len(self.__clusters)):
233  for data_object in self.__clusters[index_cluster]:
234  hashable_data_object = str(data_object)
235  if hashable_data_object in positions:
236  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1)
237  else:
238  index_object = self.__data.index(data_object)
239 
240  clusters[index_cluster].append(index_object)
241  positions[hashable_data_object] = index_object
242 
243  return clusters
244 
245 
246  def __convert_label_to_index(self):
247  clusters = [[] for _ in range(max(self.__clusters) + 1)]
248 
249  for index_object in range(len(self.__data)):
250  index_cluster = self.__clusters[index_object]
251  if not math.isnan(index_cluster):
252  clusters[index_cluster].append(index_object)
253 
254  return clusters
255 
256 
257  def __convert_label_to_object(self):
258  clusters = [[] for _ in range(max(self.__clusters) + 1)]
259 
260  for index_object in range(len(self.__data)):
261  index_cluster = self.__clusters[index_object]
262  if not math.isnan(index_cluster):
263  clusters[index_cluster].append(self.__data[index_object])
264 
265  return clusters
pyclustering.cluster.encoder.cluster_encoder.__clusters
__clusters
Definition: encoder.py:116
pyclustering.cluster.encoder.type_encoding
Enumeration of encoding types (index labeling, index list separation, object list separation).
Definition: encoder.py:16
pyclustering.cluster.encoder.cluster_encoder.__data
__data
Definition: encoder.py:117
pyclustering.cluster.encoder.cluster_encoder.set_encoding
def set_encoding(self, encoding)
Change clusters encoding to specified type (Index List, Object List, Labeling).
Definition: encoder.py:147
pyclustering.cluster.encoder.cluster_encoder.__type_representation
__type_representation
Definition: encoder.py:115
pyclustering.cluster.encoder.cluster_encoder.__convert_index_to_object
def __convert_index_to_object(self)
Definition: encoder.py:198
pyclustering.cluster.encoder.cluster_encoder.get_encoding
def get_encoding(self)
Returns current cluster representation.
Definition: encoder.py:121
pyclustering.cluster.encoder.cluster_encoder
Provides service to change clustering result representation.
Definition: encoder.py:32
pyclustering.cluster.encoder.cluster_encoder.get_data
def get_data(self)
Returns data that was used for cluster analysis.
Definition: encoder.py:139
pyclustering.cluster.encoder.cluster_encoder.get_clusters
def get_clusters(self)
Returns clusters that are represented in line with type that is defined by get_encoding().
Definition: encoder.py:129
pyclustering.cluster.encoder.cluster_encoder.__convert_object_to_index
def __convert_object_to_index(self)
Definition: encoder.py:229
pyclustering.cluster.encoder.cluster_encoder.__convert_label_to_index
def __convert_label_to_index(self)
Definition: encoder.py:246
pyclustering.cluster.encoder.cluster_encoder.__init__
def __init__(self, encoding, clusters, data)
Constructor of clustering result representor.
Definition: encoder.py:103
pyclustering.cluster.encoder.cluster_encoder.__convert_index_to_label
def __convert_index_to_label(self)
Definition: encoder.py:185
pyclustering.cluster.encoder.cluster_encoder.__convert_object_to_label
def __convert_object_to_label(self)
Definition: encoder.py:208
pyclustering.cluster.encoder.cluster_encoder.__convert_label_to_object
def __convert_label_to_object(self)
Definition: encoder.py:257