fcm.py
1 """!
2 
3 @brief Cluster analysis algorithm: Fuzzy C-Means
4 @details Implementation based on paper @cite book::pattern_recognition_with_fuzzy.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2019
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import numpy
29 
30 import pyclustering.core.fcm_wrapper as wrapper
31 
32 from pyclustering.core.wrapper import ccore_library
33 
34 
35 class fcm:
36  """!
37  @brief Class represents Fuzzy C-means (FCM) clustering algorithm.
38  @details Fuzzy clustering is a form of clustering in which each data point can belong to more than one cluster.
39 
40  Fuzzy C-Means algorithm uses two general formulas for cluster analysis. The first is to updated membership of each
41  point:
42  \f[w_{ij}=\frac{1}{\sum_{k=0}^{c}\left ( \frac{\left \| x_{i}-c_{j} \right \|}{\left \| x_{i}-c_{k} \right \|} \right )^{\frac{2}{m-1}}}\f]
43 
44  The second formula is used to update centers in line with obtained centers:
45  \f[c_{k}=\frac{\sum_{i=0}^{N}w_{k}\left ( x_{i} \right )^{m}x_{i}}{\sum_{i=0}^{N}w_{k}\left ( x_{i} \right )^{m}}\f]
46 
47  Fuzzy C-Means clustering results depend on initial centers. Algorithm K-Means++ can used for center initialization
48  from module 'pyclustering.cluster.center_initializer'.
49 
50  CCORE implementation of the algorithm uses thread pool to parallelize the clustering process.
51 
52  Here is an example how to perform cluster analysis using Fuzzy C-Means algorithm:
53  @code
54  from pyclustering.samples.definitions import FAMOUS_SAMPLES
55  from pyclustering.cluster import cluster_visualizer
56  from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
57  from pyclustering.cluster.fcm import fcm
58  from pyclustering.utils import read_sample
59 
60  # load list of points for cluster analysis
61  sample = read_sample(FAMOUS_SAMPLES.SAMPLE_OLD_FAITHFUL)
62 
63  # initialize
64  initial_centers = kmeans_plusplus_initializer(sample, 2, kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
65 
66  # create instance of Fuzzy C-Means algorithm
67  fcm_instance = fcm(sample, initial_centers)
68 
69  # run cluster analysis and obtain results
70  fcm_instance.process()
71  clusters = fcm_instance.get_clusters()
72  centers = fcm_instance.get_centers()
73 
74  # visualize clustering results
75  visualizer = cluster_visualizer()
76  visualizer.append_clusters(clusters, sample)
77  visualizer.append_cluster(centers, marker='*', markersize=10)
78  visualizer.show()
79  @endcode
80 
81  The next example shows how to perform image segmentation using Fuzzy C-Means algorithm:
82  @code
83  from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
84  from pyclustering.cluster.fcm import fcm
85  from pyclustering.utils import read_image, draw_image_mask_segments
86 
87  # load list of points for cluster analysis
88  data = read_image("stpetersburg_admiral.jpg")
89 
90  # initialize
91  initial_centers = kmeans_plusplus_initializer(data, 3, kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
92 
93  # create instance of Fuzzy C-Means algorithm
94  fcm_instance = fcm(data, initial_centers)
95 
96  # run cluster analysis and obtain results
97  fcm_instance.process()
98  clusters = fcm_instance.get_clusters()
99 
100  # visualize segmentation results
101  draw_image_mask_segments("stpetersburg_admiral.jpg", clusters)
102  @endcode
103 
104  @image html fcm_segmentation_stpetersburg.png "Image segmentation using Fuzzy C-Means algorithm."
105 
106  """
107 
108  def __init__(self, data, initial_centers, **kwargs):
109  """!
110  @brief Initialize Fuzzy C-Means algorithm.
111 
112  @param[in] data (array_like): Input data that is presented as array of points (objects), each point should be represented by array_like data structure.
113  @param[in] initial_centers (array_like): Initial coordinates of centers of clusters that are represented by array_like data structure: [center1, center2, ...].
114  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'tolerance', 'itermax', 'm').
115 
116  <b>Keyword Args:</b><br>
117  - ccore (bool): Defines should be CCORE library (C++ pyclustering library) used instead of Python code or not.
118  - tolerance (float): Stop condition: if maximum value of change of centers of clusters is less than tolerance then algorithm stops processing.
119  - itermax (uint): Maximum number of iterations that is used for clustering process (by default: 200).
120  - m (float): Hyper-parameter that controls how fuzzy the cluster will be. The higher it is, the fuzzier the cluster will be in the end.
121  This parameter should be greater than 1 (by default: 2).
122 
123  """
124 
125  self.__data = data
126  self.__clusters = []
127  self.__centers = initial_centers
128  self.__membership = []
129 
130  self.__tolerance = kwargs.get('tolerance', 0.001)
131  self.__itermax = kwargs.get('itermax', 200)
132  self.__m = kwargs.get('m', 2)
133 
134  self.__degree = 2.0 / (self.__m - 1)
135 
136  self.__ccore = kwargs.get('ccore', True)
137  if self.__ccore is True:
138  self.__ccore = ccore_library.workable()
139 
140  self.__verify_arguments()
141 
142 
143  def process(self):
144  """!
145  @brief Performs cluster analysis in line with Fuzzy C-Means algorithm.
146 
147  @return (fcm) Returns itself (Fuzzy C-Means instance).
148 
149  @see get_clusters()
150  @see get_centers()
151  @see get_membership()
152 
153  """
154  if self.__ccore is True:
155  self.__process_by_ccore()
156  else:
157  self.__process_by_python()
158 
159  return self
160 
161 
162  def get_clusters(self):
163  """!
164  @brief Returns allocated clusters that consists of points that most likely (in line with membership) belong to
165  these clusters.
166 
167  @remark Allocated clusters can be returned only after data processing (use method process()). Otherwise empty list is returned.
168 
169  @return (list) List of allocated clusters, each cluster contains indexes from input data.
170 
171  @see process()
172  @see get_centers()
173  @see get_membership()
174 
175  """
176  return self.__clusters
177 
178 
179  def get_centers(self):
180  """!
181  @brief Returns list of centers of allocated clusters.
182 
183  @return (array_like) Cluster centers.
184 
185  @see process()
186  @see get_clusters()
187  @see get_membership()
188 
189  """
190  return self.__centers
191 
192 
193  def get_membership(self):
194  """!
195  @brief Returns cluster membership (probability) for each point in data.
196 
197  @return (array_like) Membership for each point in format [[Px1(c1), Px1(c2), ...], [Px2(c1), Px2(c2), ...], ...],
198  where [Px1(c1), Px1(c2), ...] membership for point x1.
199 
200  @see process()
201  @see get_clusters()
202  @see get_centers()
203 
204  """
205  return self.__membership
206 
207 
208  def __process_by_ccore(self):
209  """!
210  @brief Performs cluster analysis using C/C++ implementation.
211 
212  """
213  result = wrapper.fcm_algorithm(self.__data, self.__centers, self.__m, self.__tolerance, self.__itermax)
214 
215  self.__clusters = result[wrapper.fcm_package_indexer.INDEX_CLUSTERS]
216  self.__centers = result[wrapper.fcm_package_indexer.INDEX_CENTERS]
217  self.__membership = result[wrapper.fcm_package_indexer.INDEX_MEMBERSHIP]
218 
219 
220  def __process_by_python(self):
221  """!
222  @brief Performs cluster analysis using Python implementation.
223 
224  """
225  self.__data = numpy.array(self.__data)
226  self.__centers = numpy.array(self.__centers)
227 
228  self.__membership = numpy.zeros((len(self.__data), len(self.__centers)))
229 
230  change = float('inf')
231  iteration = 0
232 
233  while change > self.__tolerance and iteration < self.__itermax:
234  self.__update_membership()
235  centers = self.__calculate_centers()
236  change = self.__calculate_changes(centers)
237 
238  self.__centers = centers
239  iteration += 1
240 
241  self.__extract_clusters()
242 
243 
244  def __calculate_centers(self):
245  """!
246  @brief Calculate center using membership of each cluster.
247 
248  @return (list) Updated clusters as list of clusters. Each cluster contains indexes of objects from data.
249 
250  @return (numpy.array) Updated centers.
251 
252  """
253  dimension = self.__data.shape[1]
254  centers = numpy.zeros((len(self.__centers), dimension))
255 
256  for i in range(len(self.__centers)):
257  # multiplication '@' requires python version 3.5
258  centers[i] = numpy.divide(self.__membership[:, i] @ self.__data, numpy.sum(self.__membership[:, i]))
259 
260  return centers
261 
262 
263  def __update_membership(self):
264  """!
265  @brief Update membership for each point in line with current cluster centers.
266 
267  """
268  data_difference = numpy.zeros((len(self.__centers), len(self.__data)))
269 
270  for i in range(len(self.__centers)):
271  data_difference[i] = numpy.sum(numpy.square(self.__data - self.__centers[i]), axis=1)
272 
273  for i in range(len(self.__data)):
274  for j in range(len(self.__centers)):
275  divider = sum([pow(data_difference[j][i] / data_difference[k][i], self.__degree) for k in range(len(self.__centers)) if data_difference[k][i] != 0.0])
276 
277  if divider != 0.0:
278  self.__membership[i][j] = 1.0 / divider
279  else:
280  self.__membership[i][j] = 1.0
281 
282 
283  def __calculate_changes(self, updated_centers):
284  """!
285  @brief Calculate changes between centers.
286 
287  @return (float) Maximum change between centers.
288 
289  """
290  changes = numpy.sum(numpy.square(self.__centers - updated_centers), axis=1).T
291  return numpy.max(changes)
292 
293 
294  def __extract_clusters(self):
295  self.__clusters = [[] for i in range(len(self.__centers))]
296  belongs = numpy.argmax(self.__membership, axis=1)
297 
298  for i in range(len(belongs)):
299  self.__clusters[belongs[i]].append(i)
300 
301 
302  def __verify_arguments(self):
303  """!
304  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
305 
306  """
307  if len(self.__data) == 0:
308  raise ValueError("Input data is empty (size: '%d')." % len(self.__data))
309 
310  if len(self.__centers) == 0:
311  raise ValueError("Initial centers are empty (size: '%d')." % len(self.__centers))
def __update_membership(self)
Update membership for each point in line with current cluster centers.
Definition: fcm.py:263
def __extract_clusters(self)
Definition: fcm.py:294
def get_centers(self)
Returns list of centers of allocated clusters.
Definition: fcm.py:179
def process(self)
Performs cluster analysis in line with Fuzzy C-Means algorithm.
Definition: fcm.py:143
def __calculate_centers(self)
Calculate center using membership of each cluster.
Definition: fcm.py:244
def __calculate_changes(self, updated_centers)
Calculate changes between centers.
Definition: fcm.py:283
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: fcm.py:302
def get_clusters(self)
Returns allocated clusters that consists of points that most likely (in line with membership) belong ...
Definition: fcm.py:162
Class represents Fuzzy C-means (FCM) clustering algorithm.
Definition: fcm.py:35
def get_membership(self)
Returns cluster membership (probability) for each point in data.
Definition: fcm.py:193
def __init__(self, data, initial_centers, kwargs)
Initialize Fuzzy C-Means algorithm.
Definition: fcm.py:108
def __process_by_ccore(self)
Performs cluster analysis using C/C++ implementation.
Definition: fcm.py:208
def __process_by_python(self)
Performs cluster analysis using Python implementation.
Definition: fcm.py:220