encoder.py
1 """!
2 
3 @brief Module for representing clustering results.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2019
7 @copyright GNU Public License
8 
9 @cond GNU_PUBLIC_LICENSE
10  PyClustering is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23 
24 """
25 
26 
27 from enum import IntEnum;
28 
29 
30 class type_encoding(IntEnum):
31  """!
32  @brief Enumeration of encoding types (index labeling, index list separation, object list separation).
33 
34  """
35 
36 
37  CLUSTER_INDEX_LABELING = 0;
38 
39 
40  CLUSTER_INDEX_LIST_SEPARATION = 1;
41 
42 
43  CLUSTER_OBJECT_LIST_SEPARATION = 2;
44 
45 
47  """!
48  @brief Provides service to change clustering result representation.
49 
50  Example:
51  @code
52  # load list of points for cluster analysis
53  sample = read_sample(path);
54 
55  # create instance of K-Means algorithm
56  kmeans_instance = kmeans(sample, [ [0.0, 0.1], [2.5, 2.6] ]);
57 
58  # run cluster analysis and obtain results
59  kmeans_instance.process();
60  clusters = kmeans_instance.get_clusters();
61 
62  # by default k-means returns representation CLUSTER_INDEX_LIST_SEPARATION
63  type_repr = kmeans_instance.get_cluster_encoding();
64  encoder = cluster_encoder(type_repr, clusters, sample);
65 
66  # change representation from index list to label list
67  representor.set_encoding(type_encoding.CLUSTER_INDEX_LABELING);
68 
69  # change representation from label to object list
70  representor.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION);
71  @endcode
72  """
73 
74  def __init__(self, encoding, clusters, data):
75  """!
76  @brief Constructor of clustering result representor.
77 
78  @param[in] encoding (type_encoding): Type of clusters representation (index list, object list or labels).
79  @param[in] clusters (list): Current clusters representation.
80  @param[in] data (list): Data that corresponds to clusters.
81 
82  """
83 
84  self.__type_representation = encoding;
85  self.__clusters = clusters;
86  self.__data = data;
87 
88 
89  @property
90  def get_encoding(self):
91  """!
92  @brief Returns current cluster representation.
93 
94  """
95  return self.__type_representation;
96 
97 
98  def get_clusters(self):
99  """!
100  @brief Returns clusters representation.
101 
102  """
103  return self.__clusters;
104 
105 
106  def get_data(self):
107  """!
108  @brief Returns data that corresponds to clusters.
109 
110  """
111  return self.__data;
112 
113 
114  def set_encoding(self, encoding):
115  """!
116  @brief Change clusters encoding to specified type (index list, object list, labeling).
117 
118  @param[in] encoding (type_encoding): New type of clusters representation.
119 
120  """
121 
122  if(encoding == self.__type_representation):
123  return;
124 
125  if (self.__type_representation == type_encoding.CLUSTER_INDEX_LABELING):
126  if (encoding == type_encoding.CLUSTER_INDEX_LIST_SEPARATION):
127  self.__clusters = self.__convert_label_to_index();
128 
129  else:
131 
132  elif (self.__type_representation == type_encoding.CLUSTER_INDEX_LIST_SEPARATION):
133  if (encoding == type_encoding.CLUSTER_INDEX_LABELING):
134  self.__clusters = self.__convert_index_to_label();
135 
136  else:
138 
139  else:
140  if (encoding == type_encoding.CLUSTER_INDEX_LABELING):
142 
143  else:
145 
146  self.__type_representation = encoding;
147 
148 
149  def __convert_index_to_label(self):
150  clusters = [0] * len(self.__data);
151  index_cluster = 0;
152 
153  for cluster in self.__clusters:
154  for index_object in cluster:
155  clusters[index_object] = index_cluster;
156 
157  index_cluster += 1;
158 
159  return clusters;
160 
161 
162  def __convert_index_to_object(self):
163  clusters = [ [] for _ in range(len(self.__clusters)) ];
164  for index_cluster in range(len(self.__clusters)):
165  for index_object in self.__clusters[index_cluster]:
166  data_object = self.__data[index_object];
167  clusters[index_cluster].append(data_object);
168 
169  return clusters;
170 
171 
172  def __convert_object_to_label(self):
173  positions = dict();
174  clusters = [0] * len(self.__data);
175  index_cluster = 0;
176 
177  for cluster in self.__clusters:
178  for data_object in cluster:
179  index_object = -1;
180  hashable_data_object = str(data_object);
181  if (hashable_data_object in positions):
182  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1);
183  else:
184  index_object = self.__data.index(data_object);
185 
186  clusters[index_object] = index_cluster;
187  positions[hashable_data_object] = index_object;
188 
189  index_cluster += 1;
190 
191  return clusters;
192 
193 
194  def __convert_object_to_index(self):
195  positions = dict();
196  clusters = [ [] for _ in range(len(self.__clusters)) ];
197  for index_cluster in range(len(self.__clusters)):
198  for data_object in self.__clusters[index_cluster]:
199  index_object = -1;
200  hashable_data_object = str(data_object);
201  if (hashable_data_object in positions):
202  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1);
203  else:
204  index_object = self.__data.index(data_object);
205 
206  clusters[index_cluster].append(index_object);
207  positions[hashable_data_object] = index_object;
208 
209  return clusters;
210 
211 
212  def __convert_label_to_index(self):
213  clusters = [ [] for _ in range(max(self.__clusters) + 1) ];
214 
215  for index_object in range(len(self.__data)):
216  index_cluster = self.__clusters[index_object];
217  clusters[index_cluster].append(index_object);
218 
219  return clusters;
220 
221 
222  def __convert_label_to_object(self):
223  clusters = [ [] for _ in range(max(self.__clusters) + 1) ];
224 
225  for index_object in range(len(self.__data)):
226  index_cluster = self.__clusters[index_object];
227  clusters[index_cluster].append(self.__data[index_object]);
228 
229  return clusters;
def set_encoding(self, encoding)
Change clusters encoding to specified type (index list, object list, labeling).
Definition: encoder.py:114
Enumeration of encoding types (index labeling, index list separation, object list separation)...
Definition: encoder.py:30
def get_encoding(self)
Returns current cluster representation.
Definition: encoder.py:90
def get_clusters(self)
Returns clusters representation.
Definition: encoder.py:98
Provides service to change clustering result representation.
Definition: encoder.py:46
def get_data(self)
Returns data that corresponds to clusters.
Definition: encoder.py:106
def __init__(self, encoding, clusters, data)
Constructor of clustering result representor.
Definition: encoder.py:74