3 @brief Module for representing clustering results. 5 @authors Andrei Novikov (pyclustering@yandex.ru) 7 @copyright GNU Public License 9 @cond GNU_PUBLIC_LICENSE 10 PyClustering is free software: you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation, either version 3 of the License, or 13 (at your option) any later version. 15 PyClustering is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 20 You should have received a copy of the GNU General Public License 21 along with this program. If not, see <http://www.gnu.org/licenses/>. 28 from enum
import IntEnum
33 @brief Enumeration of encoding types (index labeling, index list separation, object list separation). 38 CLUSTER_INDEX_LABELING = 0
41 CLUSTER_INDEX_LIST_SEPARATION = 1
44 CLUSTER_OBJECT_LIST_SEPARATION = 2
49 @brief Provides service to change clustering result representation. 50 @details There are three general types of representation: 51 1. Index List Separation that is defined by `CLUSTER_INDEX_LIST_SEPARATION`, for example `[[0, 1, 2], [3, 4], [5, 6, 7]`. 52 2. Index Labeling that is defined by `CLUSTER_INDEX_LABELING`, for example `[0, 0, 0, 1, 1, 2, 2, 2]`. 53 3. Object List Separation that is defined by `CLUSTER_OBJECT_LIST_SEPARATION`, for example `[[obj1, obj2, obj3], [obj4, obj5], [obj5, obj6, obj7]`. 55 There is an example how to covert default Index List Separation to other types: 57 from pyclustering.utils import read_sample 58 from pyclustering.samples.definitions import SIMPLE_SAMPLES 60 from pyclustering.cluster.encoder import type_encoding, cluster_encoder 61 from pyclustering.cluster.kmeans import kmeans 63 # load list of points for cluster analysis 64 sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1) 66 # create instance of K-Means algorithm 67 kmeans_instance = kmeans(sample, [[3.0, 5.1], [6.5, 8.6]]) 69 # run cluster analysis and obtain results 70 kmeans_instance.process() 71 clusters = kmeans_instance.get_clusters() 72 print("Index List Separation:", clusters) 74 # by default k-means returns representation CLUSTER_INDEX_LIST_SEPARATION 75 type_repr = kmeans_instance.get_cluster_encoding() 76 encoder = cluster_encoder(type_repr, clusters, sample) 78 # change representation from index list to label list 79 encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING) 80 print("Index Labeling:", encoder.get_clusters()) 82 # change representation from label to object list 83 encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION) 84 print("Object List Separation:", encoder.get_clusters()) 87 Output of the code above is following: 89 Index List Separation: [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] 90 Index Labeling: [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] 91 Object List Separation: [[[3.522979, 5.487981], [3.768699, 5.364477], [3.423602, 5.4199], [3.803905, 5.389491], [3.93669, 5.663041]], [[6.968136, 7.755556], [6.750795, 7.269541], [6.593196, 7.850364], [6.978178, 7.60985], [6.554487, 7.498119]]] 94 If there is no index or object in clusters that exists in an input data then it is going to be marked as `NaN` in 95 case of Index Labeling. Here is an example: 97 from pyclustering.cluster.encoder import type_encoding, cluster_encoder 100 sample = [[1.0, 1.2], [1.2, 2.3], [114.3, 54.1], [2.2, 1.4], [5.3, 1.3]] 102 # Clusters do not contains object with index 2 ([114.3, 54.1]) because it is outline. 103 clusters = [[0, 1], [3, 4]] 105 encoder = cluster_encoder(type_encoding.CLUSTER_INDEX_LIST_SEPARATION, clusters, sample) 106 encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING) 108 print("Index Labeling:", encoder.get_clusters()) 111 Here is an output of the code above. Pay attention to `NaN` value for the object with index 2 `[114.3, 54.1]`. 113 Index Labeling: [0, 0, nan, 1, 1] 120 @brief Constructor of clustering result representor. 122 @param[in] encoding (type_encoding): Type of clusters representation (Index List, Object List or Labels). 123 @param[in] clusters (list): Clusters that were allocated from an input data. 124 @param[in] data (list): Data that was used for cluster analysis. 138 @brief Returns current cluster representation. 146 @brief Returns clusters that are represented in line with type that is defined by `get_encoding()`. 156 @brief Returns data that was used for cluster analysis. 164 @brief Change clusters encoding to specified type (Index List, Object List, Labeling). 166 @param[in] encoding (type_encoding): New type of clusters representation. 168 @return (cluster_encoder) Return itself. 176 if encoding == type_encoding.CLUSTER_INDEX_LIST_SEPARATION:
183 if encoding == type_encoding.CLUSTER_INDEX_LABELING:
190 if encoding == type_encoding.CLUSTER_INDEX_LABELING:
200 def __convert_index_to_label(self):
201 clusters = [float(
'NaN')] * len(self.
__data)
205 for index_object
in cluster:
206 clusters[index_object] = index_cluster
213 def __convert_index_to_object(self):
214 clusters = [ []
for _
in range(len(self.
__clusters)) ]
215 for index_cluster
in range(len(self.
__clusters)):
216 for index_object
in self.
__clusters[index_cluster]:
217 data_object = self.
__data[index_object]
218 clusters[index_cluster].append(data_object)
223 def __convert_object_to_label(self):
225 clusters = [float(
'NaN')] * len(self.
__data)
229 for data_object
in cluster:
230 hashable_data_object = str(data_object)
231 if hashable_data_object
in positions:
232 index_object = self.
__data.index(data_object, positions[hashable_data_object] + 1)
234 index_object = self.
__data.index(data_object)
236 clusters[index_object] = index_cluster
237 positions[hashable_data_object] = index_object
244 def __convert_object_to_index(self):
246 clusters = [[]
for _
in range(len(self.
__clusters))]
247 for index_cluster
in range(len(self.
__clusters)):
248 for data_object
in self.
__clusters[index_cluster]:
249 hashable_data_object = str(data_object)
250 if hashable_data_object
in positions:
251 index_object = self.
__data.index(data_object, positions[hashable_data_object] + 1)
253 index_object = self.
__data.index(data_object)
255 clusters[index_cluster].append(index_object)
256 positions[hashable_data_object] = index_object
261 def __convert_label_to_index(self):
262 clusters = [[]
for _
in range(max(self.
__clusters) + 1)]
264 for index_object
in range(len(self.
__data)):
266 if not math.isnan(index_cluster):
267 clusters[index_cluster].append(index_object)
272 def __convert_label_to_object(self):
273 clusters = [[]
for _
in range(max(self.
__clusters) + 1)]
275 for index_object
in range(len(self.
__data)):
277 if not math.isnan(index_cluster):
278 clusters[index_cluster].append(self.
__data[index_object])
def __convert_index_to_label(self)
def set_encoding(self, encoding)
Change clusters encoding to specified type (Index List, Object List, Labeling).
def __convert_index_to_object(self)
Enumeration of encoding types (index labeling, index list separation, object list separation)...
def get_encoding(self)
Returns current cluster representation.
def __convert_label_to_index(self)
def get_clusters(self)
Returns clusters that are represented in line with type that is defined by get_encoding().
Provides service to change clustering result representation.
def get_data(self)
Returns data that was used for cluster analysis.
def __convert_label_to_object(self)
def __convert_object_to_index(self)
def __init__(self, encoding, clusters, data)
Constructor of clustering result representor.
def __convert_object_to_label(self)