encoder.py
1 """!
2 
3 @brief Module for representing clustering results.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2019
7 @copyright GNU Public License
8 
9 @cond GNU_PUBLIC_LICENSE
10  PyClustering is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23 
24 """
25 
26 
27 from enum import IntEnum
28 
29 
30 class type_encoding(IntEnum):
31  """!
32  @brief Enumeration of encoding types (index labeling, index list separation, object list separation).
33 
34  """
35 
36 
37  CLUSTER_INDEX_LABELING = 0
38 
39 
40  CLUSTER_INDEX_LIST_SEPARATION = 1
41 
42 
43  CLUSTER_OBJECT_LIST_SEPARATION = 2
44 
45 
47  """!
48  @brief Provides service to change clustering result representation.
49 
50  Example:
51  @code
52  # load list of points for cluster analysis
53  sample = read_sample(path);
54 
55  # create instance of K-Means algorithm
56  kmeans_instance = kmeans(sample, [ [0.0, 0.1], [2.5, 2.6] ]);
57 
58  # run cluster analysis and obtain results
59  kmeans_instance.process();
60  clusters = kmeans_instance.get_clusters();
61 
62  # by default k-means returns representation CLUSTER_INDEX_LIST_SEPARATION
63  type_repr = kmeans_instance.get_cluster_encoding();
64  encoder = cluster_encoder(type_repr, clusters, sample);
65 
66  # change representation from index list to label list
67  representor.set_encoding(type_encoding.CLUSTER_INDEX_LABELING);
68 
69  # change representation from label to object list
70  representor.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION);
71  @endcode
72  """
73 
74  def __init__(self, encoding, clusters, data):
75  """!
76  @brief Constructor of clustering result representor.
77 
78  @param[in] encoding (type_encoding): Type of clusters representation (index list, object list or labels).
79  @param[in] clusters (list): Current clusters representation.
80  @param[in] data (list): Data that corresponds to clusters.
81 
82  """
83 
84  self.__type_representation = encoding
85  self.__clusters = clusters
86  self.__data = data
87 
88 
89  @property
90  def get_encoding(self):
91  """!
92  @brief Returns current cluster representation.
93 
94  """
95  return self.__type_representation
96 
97 
98  def get_clusters(self):
99  """!
100  @brief Returns clusters representation.
101 
102  """
103  return self.__clusters
104 
105 
106  def get_data(self):
107  """!
108  @brief Returns data that corresponds to clusters.
109 
110  """
111  return self.__data
112 
113 
114  def set_encoding(self, encoding):
115  """!
116  @brief Change clusters encoding to specified type (index list, object list, labeling).
117 
118  @param[in] encoding (type_encoding): New type of clusters representation.
119 
120  @return (cluster_encoder) Return itself.
121 
122  """
123 
124  if(encoding == self.__type_representation):
125  return
126 
127  if (self.__type_representation == type_encoding.CLUSTER_INDEX_LABELING):
128  if (encoding == type_encoding.CLUSTER_INDEX_LIST_SEPARATION):
130 
131  else:
133 
134  elif (self.__type_representation == type_encoding.CLUSTER_INDEX_LIST_SEPARATION):
135  if (encoding == type_encoding.CLUSTER_INDEX_LABELING):
137 
138  else:
140 
141  else:
142  if (encoding == type_encoding.CLUSTER_INDEX_LABELING):
144 
145  else:
147 
148  self.__type_representation = encoding
149  return self
150 
151 
152  def __convert_index_to_label(self):
153  clusters = [0] * len(self.__data)
154  index_cluster = 0
155 
156  for cluster in self.__clusters:
157  for index_object in cluster:
158  clusters[index_object] = index_cluster
159 
160  index_cluster += 1
161 
162  return clusters
163 
164 
165  def __convert_index_to_object(self):
166  clusters = [ [] for _ in range(len(self.__clusters)) ];
167  for index_cluster in range(len(self.__clusters)):
168  for index_object in self.__clusters[index_cluster]:
169  data_object = self.__data[index_object];
170  clusters[index_cluster].append(data_object);
171 
172  return clusters;
173 
174 
175  def __convert_object_to_label(self):
176  positions = dict();
177  clusters = [0] * len(self.__data);
178  index_cluster = 0;
179 
180  for cluster in self.__clusters:
181  for data_object in cluster:
182  index_object = -1;
183  hashable_data_object = str(data_object);
184  if (hashable_data_object in positions):
185  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1);
186  else:
187  index_object = self.__data.index(data_object);
188 
189  clusters[index_object] = index_cluster;
190  positions[hashable_data_object] = index_object;
191 
192  index_cluster += 1;
193 
194  return clusters;
195 
196 
197  def __convert_object_to_index(self):
198  positions = dict();
199  clusters = [ [] for _ in range(len(self.__clusters)) ];
200  for index_cluster in range(len(self.__clusters)):
201  for data_object in self.__clusters[index_cluster]:
202  index_object = -1;
203  hashable_data_object = str(data_object);
204  if (hashable_data_object in positions):
205  index_object = self.__data.index(data_object, positions[hashable_data_object] + 1);
206  else:
207  index_object = self.__data.index(data_object);
208 
209  clusters[index_cluster].append(index_object);
210  positions[hashable_data_object] = index_object;
211 
212  return clusters;
213 
214 
215  def __convert_label_to_index(self):
216  clusters = [ [] for _ in range(max(self.__clusters) + 1) ];
217 
218  for index_object in range(len(self.__data)):
219  index_cluster = self.__clusters[index_object];
220  clusters[index_cluster].append(index_object);
221 
222  return clusters;
223 
224 
225  def __convert_label_to_object(self):
226  clusters = [ [] for _ in range(max(self.__clusters) + 1) ];
227 
228  for index_object in range(len(self.__data)):
229  index_cluster = self.__clusters[index_object];
230  clusters[index_cluster].append(self.__data[index_object]);
231 
232  return clusters;
def set_encoding(self, encoding)
Change clusters encoding to specified type (index list, object list, labeling).
Definition: encoder.py:114
Enumeration of encoding types (index labeling, index list separation, object list separation)...
Definition: encoder.py:30
def get_encoding(self)
Returns current cluster representation.
Definition: encoder.py:90
def get_clusters(self)
Returns clusters representation.
Definition: encoder.py:98
Provides service to change clustering result representation.
Definition: encoder.py:46
def get_data(self)
Returns data that corresponds to clusters.
Definition: encoder.py:106
def __init__(self, encoding, clusters, data)
Constructor of clustering result representor.
Definition: encoder.py:74