3 @brief Module for representing clustering results.
5 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @copyright BSD-3-Clause
13 from enum
import IntEnum
18 @brief Enumeration of encoding types (index labeling, index list separation, object list separation).
23 CLUSTER_INDEX_LABELING = 0
26 CLUSTER_INDEX_LIST_SEPARATION = 1
29 CLUSTER_OBJECT_LIST_SEPARATION = 2
34 @brief Provides service to change clustering result representation.
35 @details There are three general types of representation:
36 1. Index List Separation that is defined by `CLUSTER_INDEX_LIST_SEPARATION`, for example `[[0, 1, 2], [3, 4], [5, 6, 7]`.
37 2. Index Labeling that is defined by `CLUSTER_INDEX_LABELING`, for example `[0, 0, 0, 1, 1, 2, 2, 2]`.
38 3. Object List Separation that is defined by `CLUSTER_OBJECT_LIST_SEPARATION`, for example `[[obj1, obj2, obj3], [obj4, obj5], [obj5, obj6, obj7]`.
40 There is an example how to covert default Index List Separation to other types:
42 from pyclustering.utils import read_sample
43 from pyclustering.samples.definitions import SIMPLE_SAMPLES
45 from pyclustering.cluster.encoder import type_encoding, cluster_encoder
46 from pyclustering.cluster.kmeans import kmeans
48 # load list of points for cluster analysis
49 sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1)
51 # create instance of K-Means algorithm
52 kmeans_instance = kmeans(sample, [[3.0, 5.1], [6.5, 8.6]])
54 # run cluster analysis and obtain results
55 kmeans_instance.process()
56 clusters = kmeans_instance.get_clusters()
57 print("Index List Separation:", clusters)
59 # by default k-means returns representation CLUSTER_INDEX_LIST_SEPARATION
60 type_repr = kmeans_instance.get_cluster_encoding()
61 encoder = cluster_encoder(type_repr, clusters, sample)
63 # change representation from index list to label list
64 encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
65 print("Index Labeling:", encoder.get_clusters())
67 # change representation from label to object list
68 encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
69 print("Object List Separation:", encoder.get_clusters())
72 Output of the code above is following:
74 Index List Separation: [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
75 Index Labeling: [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
76 Object List Separation: [[[3.522979, 5.487981], [3.768699, 5.364477], [3.423602, 5.4199], [3.803905, 5.389491], [3.93669, 5.663041]], [[6.968136, 7.755556], [6.750795, 7.269541], [6.593196, 7.850364], [6.978178, 7.60985], [6.554487, 7.498119]]]
79 If there is no index or object in clusters that exists in an input data then it is going to be marked as `NaN` in
80 case of Index Labeling. Here is an example:
82 from pyclustering.cluster.encoder import type_encoding, cluster_encoder
85 sample = [[1.0, 1.2], [1.2, 2.3], [114.3, 54.1], [2.2, 1.4], [5.3, 1.3]]
87 # Clusters do not contains object with index 2 ([114.3, 54.1]) because it is outline.
88 clusters = [[0, 1], [3, 4]]
90 encoder = cluster_encoder(type_encoding.CLUSTER_INDEX_LIST_SEPARATION, clusters, sample)
91 encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
93 print("Index Labeling:", encoder.get_clusters())
96 Here is an output of the code above. Pay attention to `NaN` value for the object with index 2 `[114.3, 54.1]`.
98 Index Labeling: [0, 0, nan, 1, 1]
105 @brief Constructor of clustering result representor.
107 @param[in] encoding (type_encoding): Type of clusters representation (Index List, Object List or Labels).
108 @param[in] clusters (list): Clusters that were allocated from an input data.
109 @param[in] data (list): Data that was used for cluster analysis.
123 @brief Returns current cluster representation.
131 @brief Returns clusters that are represented in line with type that is defined by `get_encoding()`.
141 @brief Returns data that was used for cluster analysis.
149 @brief Change clusters encoding to specified type (Index List, Object List, Labeling).
151 @param[in] encoding (type_encoding): New type of clusters representation.
153 @return (cluster_encoder) Return itself.
161 if encoding == type_encoding.CLUSTER_INDEX_LIST_SEPARATION:
168 if encoding == type_encoding.CLUSTER_INDEX_LABELING:
175 if encoding == type_encoding.CLUSTER_INDEX_LABELING:
185 def __convert_index_to_label(self):
186 clusters = [float(
'NaN')] * len(self.
__data)
190 for index_object
in cluster:
191 clusters[index_object] = index_cluster
198 def __convert_index_to_object(self):
199 clusters = [ []
for _
in range(len(self.
__clusters)) ]
200 for index_cluster
in range(len(self.
__clusters)):
201 for index_object
in self.
__clusters[index_cluster]:
202 data_object = self.
__data[index_object]
203 clusters[index_cluster].append(data_object)
208 def __convert_object_to_label(self):
210 clusters = [float(
'NaN')] * len(self.
__data)
214 for data_object
in cluster:
215 hashable_data_object = str(data_object)
216 if hashable_data_object
in positions:
217 index_object = self.
__data.index(data_object, positions[hashable_data_object] + 1)
219 index_object = self.
__data.index(data_object)
221 clusters[index_object] = index_cluster
222 positions[hashable_data_object] = index_object
229 def __convert_object_to_index(self):
231 clusters = [[]
for _
in range(len(self.
__clusters))]
232 for index_cluster
in range(len(self.
__clusters)):
233 for data_object
in self.
__clusters[index_cluster]:
234 hashable_data_object = str(data_object)
235 if hashable_data_object
in positions:
236 index_object = self.
__data.index(data_object, positions[hashable_data_object] + 1)
238 index_object = self.
__data.index(data_object)
240 clusters[index_cluster].append(index_object)
241 positions[hashable_data_object] = index_object
246 def __convert_label_to_index(self):
247 clusters = [[]
for _
in range(max(self.
__clusters) + 1)]
249 for index_object
in range(len(self.
__data)):
251 if not math.isnan(index_cluster):
252 clusters[index_cluster].append(index_object)
257 def __convert_label_to_object(self):
258 clusters = [[]
for _
in range(max(self.
__clusters) + 1)]
260 for index_object
in range(len(self.
__data)):
262 if not math.isnan(index_cluster):
263 clusters[index_cluster].append(self.
__data[index_object])