clique.py
1 """!
2 
3 @brief Cluster analysis algorithm: CLIQUE
4 @details Implementation based on paper @cite article::clique::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2019
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import itertools
29 
30 from pyclustering.cluster import cluster_visualizer
31 from pyclustering.cluster.encoder import type_encoding
32 
33 from pyclustering.core.wrapper import ccore_library
34 
35 import pyclustering.core.clique_wrapper as wrapper
36 
37 
38 try:
39  import matplotlib
40  import matplotlib.gridspec as gridspec
41  import matplotlib.pyplot as plt
42  import matplotlib.patches as patches
43  import matplotlib.animation as animation
44 except Exception as error_instance:
45  import warnings
46  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
47  "functionality is not available (details: '%s')." % str(error_instance))
48 
49 
51  """!
52  @brief Visualizer of CLIQUE algorithm's results.
53  @details CLIQUE visualizer provides visualization services that are specific for CLIQUE algorithm, for example,
54  to display grid and its density.
55 
56  """
57 
58  __maximum_density_alpha = 0.6
59 
60  @staticmethod
61  def show_grid(cells, data):
62  """!
63  @brief Show CLIQUE blocks as a grid in data space.
64  @details Each block contains points and according to this density is displayed. CLIQUE grid helps to visualize
65  grid that was used for clustering process.
66 
67  @param[in] cells (list): List of cells that is produced by CLIQUE algorithm.
68  @param[in] data (array_like): Input data that was used for clustering process.
69 
70  """
71  dimension = cells[0].dimensions
72 
73  amount_canvases = 1
74  if dimension > 1:
75  amount_canvases = int(dimension * (dimension - 1) / 2)
76 
77  figure = plt.figure()
78  grid_spec = gridspec.GridSpec(1, amount_canvases)
79 
80  pairs = list(itertools.combinations(range(dimension), 2))
81  if len(pairs) == 0: pairs = [(0, 0)]
82 
83  for index in range(amount_canvases):
84  ax = figure.add_subplot(grid_spec[index])
85  clique_visualizer.__draw_cells(ax, cells, pairs[index])
86  clique_visualizer.__draw_two_dimension_data(ax, data, pairs[index])
87 
88  plt.show()
89 
90 
91  @staticmethod
92  def show_clusters(data, clusters, noise=None):
93  """!
94  @brief Display CLIQUE clustering results.
95 
96  @param[in] data (list): Data that was used for clustering.
97  @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
98  @param[in] noise (array_like): Noise that were allocated by the algorithm.
99 
100  """
101  visualizer = cluster_visualizer()
102  visualizer.append_clusters(clusters, data)
103  visualizer.append_cluster(noise or [], data, marker='x')
104  visualizer.show()
105 
106 
107  @staticmethod
108  def __draw_two_dimension_data(ax, data, pair):
109  """!
110  @brief Display data in two-dimensional canvas.
111 
112  @param[in] ax (Axis): Canvas where data should be displayed.
113  @param[in] data (list): Data points that should be displayed.
114  @param[in] pair (tuple): Pair of dimension indexes.
115 
116  """
117  ax.set_xlabel("x%d" % pair[0])
118  ax.set_ylabel("x%d" % pair[1])
119 
120  for point in data:
121  if len(data[0]) > 1:
122  ax.plot(point[pair[0]], point[pair[1]], color='red', marker='.')
123  else:
124  ax.plot(point[pair[0]], 0, color='red', marker='.')
125  ax.yaxis.set_ticklabels([])
126 
127 
128  @staticmethod
129  def __draw_cells(ax, cells, pair):
130  ax.grid(False)
131 
132  density_scale = max(len(cell.points) for cell in cells)
133  for cell in cells:
134  clique_visualizer.__draw_cell(ax, pair, cell, density_scale)
135 
136 
137  @staticmethod
138  def __draw_cell(ax, pair, cell, density_scale):
139  max_corner, min_corner = clique_visualizer.__get_rectangle_description(cell, pair)
140 
141  belong_cluster = (len(cell.points) > 0)
142 
143  if density_scale != 0.0:
144  density_scale = clique_visualizer.__maximum_density_alpha * len(cell.points) / density_scale
145 
146  face_color = matplotlib.colors.to_rgba('blue', alpha=density_scale)
147  edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
148 
149  rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
150  fill=belong_cluster,
151  facecolor=face_color,
152  edgecolor=edge_color,
153  linewidth=0.5)
154  ax.add_patch(rect)
155  #ax.annotate(str(cell.logical_location), (min_corner[0], min_corner[1]), fontsize=6, ha='center', va='center')
156 
157 
158  @staticmethod
159  def __get_rectangle_description(cell, pair):
160  max_corner, min_corner = cell.spatial_location.get_corners()
161 
162  max_corner = [max_corner[pair[0]], max_corner[pair[1]]]
163  min_corner = [min_corner[pair[0]], min_corner[pair[1]]]
164 
165  if pair == (0, 0):
166  max_corner[1], min_corner[1] = 1.0, -1.0
167 
168  return max_corner, min_corner
169 
170 
171 
173  """!
174  @brief Geometrical description of CLIQUE block in data space.
175  @details Provides services related to spatial functionality.
176 
177  @see bang_block
178 
179  """
180 
181  def __init__(self, max_corner, min_corner):
182  """!
183  @brief Creates spatial block in data space.
184 
185  @param[in] max_corner (array_like): Maximum corner coordinates of the block.
186  @param[in] min_corner (array_like): Minimal corner coordinates of the block.
187 
188  """
189  self.__max_corner = max_corner
190  self.__min_corner = min_corner
191 
192 
193  def __str__(self):
194  """!
195  @brief Returns string block description.
196 
197  @return String representation of the block.
198 
199  """
200  return "(max: %s; min: %s)" % (self.__max_corner, self.__min_corner)
201 
202 
203  def __contains__(self, point):
204  """!
205  @brief Point is considered as contained if it lies in block (belong to it).
206 
207  @return (bool) True if point is in block, otherwise False.
208 
209  """
210  for i in range(len(point)):
211  if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
212  return False
213 
214  return True
215 
216 
217  def get_corners(self):
218  """!
219  @brief Return spatial description of current block.
220 
221  @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner).
222 
223  """
224  return self.__max_corner, self.__min_corner
225 
226 
227 
229  """!
230  @brief CLIQUE block contains information about its logical location in grid, spatial location in data space and
231  points that are covered by the block.
232 
233  """
234 
235  def __init__(self, logical_location=None, spatial_location=None, points=None, visited=False):
236  """!
237  @brief Initializes CLIQUE block.
238 
239  @param[in] logical_location (list): Logical location of the block in CLIQUE grid.
240  @param[in] spatial_location (spatial_block): Spatial location in data space.
241  @param[in] points (array_like): Points that belong to this block (can be obtained by method 'capture_points',
242  this parameter is used by CLIQUE in case of processing by C++ implementation when clustering
243  result are passed back to Python code.
244  @param[in] visited (bool): Marks if block is visited during clustering process.
245 
246  """
247  self.__logical_location = logical_location or []
248  self.__spatial_location = spatial_location
249  self.__points = points or []
250  self.__visited = visited
251 
252  def __str__(self):
253  """!
254  @brief Returns string representation of the block using its logical location in CLIQUE grid.
255 
256  """
257  return str(self.__logical_location)
258 
259  def __repr__(self):
260  """!
261  @brief Returns string representation of the block using its logical location in CLIQUE grid.
262 
263  """
264  return str(self.__logical_location)
265 
266  @property
267  def logical_location(self):
268  """!
269  @brief Logical location is represented by coordinates in CLIQUE grid, for example, in case of 2x2 grid blocks
270  may have following coordinates: [0, 0], [0, 1], [1, 0], [1, 1].
271  @return (list) Logical location of the block in CLIQUE grid.
272 
273  """
274  return self.__logical_location
275 
276  @logical_location.setter
277  def logical_location(self, location):
278  """!
279  @brief Assign logical location to CLIQUE block.
280 
281  @param[in] location (list): New logical location of the block in CLIQUE grid.
282 
283  """
284  self.__logical_location = location
285 
286  @property
287  def spatial_location(self):
288  """!
289  @brief Spatial location is represented by real data space coordinates.
290  @return (spatial_block) Spatial block that describes location in data space.
291 
292  """
293  return self.__spatial_location
294 
295  @spatial_location.setter
296  def spatial_location(self, location):
297  """!
298  @brief Assign spatial location to CLIQUE block.
299 
300  @param[in] location (spatial_block): New spatial location of the block.
301 
302  """
303  self.__spatial_location = location
304 
305  @property
306  def dimensions(self):
307  """!
308  @brief Amount of dimensions where CLIQUE block is located.
309  @return (uint) Amount of dimensions where CLIQUE block is located.
310 
311  """
312  return len(self.__logical_location)
313 
314  @property
315  def points(self):
316  """!
317  @brief Points that belong to the CLIQUE block.
318  @details Points are represented by indexes that correspond to points in input data space.
319 
320  @return (array_like) Points that belong to the CLIQUE block.
321 
322  @see capture_points
323 
324  """
325  return self.__points
326 
327  @property
328  def visited(self):
329  """!
330  @brief Defines whether block is visited during cluster analysis.
331  @details If cluster analysis has not been performed then value will False.
332 
333  @return (bool) True if block has been visited during processing, False otherwise.
334 
335  """
336  return self.__visited
337 
338  @visited.setter
339  def visited(self, visited):
340  """!
341  @brief Marks or unmarks block as a visited.
342  @details This setter is used by CLIQUE algorithm.
343 
344  @param[in] visited (bool): New visited state for the CLIQUE block.
345 
346  """
347  self.__visited = visited
348 
349 
350  def capture_points(self, data, point_availability):
351  """!
352  @brief Finds points that belong to this block using availability map to reduce computational complexity by
353  checking whether the point belongs to the block.
354  @details Algorithm complexity of this method is O(n).
355 
356  @param[in] data (array_like): Data where points are represented as coordinates.
357  @param[in] point_availability (array_like): Contains boolean values that denote whether point is already belong
358  to another CLIQUE block.
359 
360  """
361  for index_point in range(len(data)):
362  if (point_availability[index_point] is True) and (data[index_point] in self.__spatial_location):
363  self.__points.append(index_point)
364  point_availability[index_point] = False
365 
366 
367  def get_location_neighbors(self, edge):
368  """!
369  @brief Forms list of logical location of each neighbor for this particular CLIQUE block.
370 
371  @param[in] edge (uint): Amount of intervals in each dimension that is used for clustering process.
372 
373  @return (list) Logical location of each neighbor for this particular CLIQUE block.
374 
375  """
376  neighbors = []
377 
378  for index_dimension in range(len(self.__logical_location)):
379  if self.__logical_location[index_dimension] + 1 < edge:
380  position = self.__logical_location[:]
381  position[index_dimension] += 1
382  neighbors.append(position)
383 
384  if self.__logical_location[index_dimension] - 1 >= 0:
385  position = self.__logical_location[:]
386  position[index_dimension] -= 1
387  neighbors.append(position)
388 
389  return neighbors
390 
391 
392 
394  """!
395  @brief Coordinate iterator is used to generate logical location description for each CLIQUE block.
396  @details This class is used by CLIQUE algorithm for clustering process.
397 
398  """
399 
400  def __init__(self, dimension, intervals):
401  """!
402  @brief Initializes coordinate iterator for CLIQUE algorithm.
403 
404  @param[in] dimension (uint): Amount of dimensions in input data space.
405  @param[in] intervals (uint): Amount of intervals in each dimension.
406 
407  """
408  self.__intervals = intervals
409  self.__dimension = dimension
410  self.__coordiate = [0] * dimension
411 
412 
413  def get_coordinate(self):
414  """!
415  @brief Returns current block coordinate.
416 
417  """
418  return self.__coordiate
419 
420 
421  def increment(self):
422  """!
423  @brief Forms logical location for next block.
424 
425  """
426  for index_dimension in range(self.__dimension):
427  if self.__coordiate[index_dimension] + 1 < self.__intervals:
428  self.__coordiate[index_dimension] += 1
429  return
430  else:
431  self.__coordiate[index_dimension] = 0
432 
433  self.__coordiate = None
434 
435 
436 
437 class clique:
438  """!
439  @brief Class implements CLIQUE grid based clustering algorithm.
440  @details CLIQUE automatically finnds subspaces with high-density clusters. It produces identical results
441  irrespective of the order in which the input records are presented and it does not presume any canonical
442  distribution for input data @cite article::clique::1.
443 
444  Here is an example where data in two-dimensional space is clustered using CLIQUE algorithm:
445  @code
446  from pyclustering.cluster.clique import clique, clique_visualizer
447  from pyclustering.utils import read_sample
448  from pyclustering.samples.definitions import FCPS_SAMPLES
449 
450  # read two-dimensional input data 'Target'
451  data = read_sample(FCPS_SAMPLES.SAMPLE_TARGET)
452 
453  # create CLIQUE algorithm for processing
454  intervals = 10 # defines amount of cells in grid in each dimension
455  threshold = 0 # lets consider each point as non-outlier
456  clique_instance = clique(data, intervals, threshold)
457 
458  # start clustering process and obtain results
459  clique_instance.process()
460  clusters = clique_instance.get_clusters() # allocated clusters
461  noise = clique_instance.get_noise() # points that are considered as outliers (in this example should be empty)
462  cells = clique_instance.get_cells() # CLIQUE blocks that forms grid
463 
464  print("Amount of clusters:", len(clusters))
465 
466  # visualize clustering results
467  clique_visualizer.show_grid(cells, data) # show grid that has been formed by the algorithm
468  clique_visualizer.show_clusters(data, clusters, noise) # show clustering results
469  @endcode
470 
471  In this example 6 clusters are allocated including four small cluster where each such small cluster consists of
472  three points. There are visualized clustering results - grid that has been formed by CLIQUE algorithm with
473  density and clusters itself:
474  @image html clique_clustering_target.png "Fig. 1. CLIQUE clustering results (grid and clusters itself)."
475 
476  Sometimes such small clusters should be considered as outliers taking into account fact that two clusters in the
477  central are relatively huge. To treat them as a noise threshold value should be increased:
478  @code
479  intervals = 10
480  threshold = 3 # block that contains 3 or less points is considered as a outlier as well as its points
481  clique_instance = clique(data, intervals, threshold)
482  @endcode
483 
484  Two clusters are allocated, but in this case some points in cluster-"circle" are also considered as outliers,
485  because CLIQUE operates with blocks, not with points:
486  @image html clique_clustering_with_noise.png "Fig. 2. Noise allocation by CLIQUE."
487 
488  """
489 
490  def __init__(self, data, amount_intervals, density_threshold, **kwargs):
491  """!
492  @brief Create CLIQUE clustering algorithm.
493 
494  @param[in] data (list): Input data (list of points) that should be clustered.
495  @param[in] amount_intervals (uint): Amount of intervals in each dimension that defines amount of CLIQUE block
496  as \f[N_{blocks} = intervals^{dimensions}\f].
497  @param[in] density_threshold (uint): Minimum number of points that should contain CLIQUE block to consider its
498  points as non-outliers.
499  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'ccore').
500 
501  <b>Keyword Args:</b><br>
502  - ccore (bool): By default is True. If True then C++ implementation is used for cluster analysis, otherwise
503  Python implementation is used.
504 
505  """
506  self.__data = data
507  self.__amount_intervals = amount_intervals
508  self.__density_threshold = density_threshold
509 
510  self.__ccore = kwargs.get('ccore', True)
511  if self.__ccore:
512  self.__ccore = ccore_library.workable()
513 
514  self.__clusters = []
515  self.__noise = []
516 
517  self.__cells = []
518  self.__cells_map = {}
519 
520  self.__validate_arguments()
521 
522 
523  def process(self):
524  """!
525  @brief Performs clustering process in line with rules of CLIQUE clustering algorithm.
526 
527  @return (clique) Returns itself (CLIQUE instance).
528 
529  @see get_clusters()
530  @see get_noise()
531  @see get_cells()
532 
533  """
534 
535  if self.__ccore:
536  self.__process_by_ccore()
537  else:
538  self.__process_by_python()
539 
540  return self
541 
542 
543  def get_clusters(self):
544  """!
545  @brief Returns allocated clusters.
546 
547  @remark Allocated clusters are returned only after data processing (method process()). Otherwise empty list is returned.
548 
549  @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
550 
551  @see process()
552  @see get_noise()
553 
554  """
555  return self.__clusters
556 
557 
558  def get_noise(self):
559  """!
560  @brief Returns allocated noise.
561 
562  @remark Allocated noise is returned only after data processing (method process()). Otherwise empty list is returned.
563 
564  @return (list) List of indexes that are marked as a noise.
565 
566  @see process()
567  @see get_clusters()
568 
569  """
570  return self.__noise
571 
572 
573  def get_cells(self):
574  """!
575  @brief Returns CLIQUE blocks that are formed during clustering process.
576  @details CLIQUE blocks can be used for visualization purposes. Each CLIQUE block contain its logical location
577  in grid, spatial location in data space and points that belong to block.
578 
579  @return (list) List of CLIQUE blocks.
580 
581  """
582  return self.__cells
583 
584 
586  """!
587  @brief Returns clustering result representation type that indicate how clusters are encoded.
588 
589  @return (type_encoding) Clustering result representation.
590 
591  @see get_clusters()
592 
593  """
594 
595  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
596 
597 
598  def __process_by_ccore(self):
599  """!
600  @brief Performs cluster analysis using C++ implementation of CLIQUE algorithm that is used by default if
601  user's target platform is supported.
602 
603  """
604  (self.__clusters, self.__noise, block_logical_locations, block_max_corners, block_min_corners, block_points) = \
605  wrapper.clique(self.__data, self.__amount_intervals, self.__density_threshold)
606 
607  amount_cells = len(block_logical_locations)
608  for i in range(amount_cells):
609  self.__cells.append(clique_block(block_logical_locations[i],
610  spatial_block(block_max_corners[i], block_min_corners[i]),
611  block_points[i],
612  True))
613 
614 
615  def __process_by_python(self):
616  """!
617  @brief Performs cluster analysis using Python implementation of CLIQUE algorithm.
618 
619  """
620  self.__create_grid()
621  self.__allocate_clusters()
622 
623  self.__cells_map.clear()
624 
625 
626  def __validate_arguments(self):
627  """!
628  @brief Check input arguments of CLIQUE algorithm and if one of them is not correct then appropriate exception
629  is thrown.
630 
631  """
632 
633  if len(self.__data) == 0:
634  raise ValueError("Empty input data. Data should contain at least one point.")
635 
636  if self.__amount_intervals <= 0:
637  raise ValueError("Incorrect amount of intervals '%d'. Amount of intervals value should be greater than 0." % self.__amount_intervals)
638 
639  if self.__density_threshold < 0:
640  raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % self.__density_threshold)
641 
642 
643  def __allocate_clusters(self):
644  """!
645  @brief Performs cluster analysis using formed CLIQUE blocks.
646 
647  """
648  for cell in self.__cells:
649  if cell.visited is False:
650  self.__expand_cluster(cell)
651 
652 
653  def __expand_cluster(self, cell):
654  """!
655  @brief Tries to expand cluster from specified cell.
656  @details During expanding points are marked as noise or append to new cluster.
657 
658  @param[in] cell (clique_block): CLIQUE block from that cluster should be expanded.
659 
660  """
661  cell.visited = True
662 
663  if len(cell.points) <= self.__density_threshold:
664  if len(cell.points) > 0:
665  self.__noise.extend(cell.points)
666 
667  return
668 
669  cluster = cell.points[:]
670  neighbors = self.__get_neighbors(cell)
671 
672  for neighbor in neighbors:
673  if len(neighbor.points) > self.__density_threshold:
674  cluster.extend(neighbor.points)
675  neighbors += self.__get_neighbors(neighbor)
676 
677  elif len(neighbor.points) > 0:
678  self.__noise.extend(neighbor.points)
679 
680  self.__clusters.append(cluster)
681 
682 
683  def __get_neighbors(self, cell):
684  """!
685  @brief Returns neighbors for specified CLIQUE block as clique_block objects.
686 
687  @return (list) Neighbors as clique_block objects.
688 
689  """
690  neighbors = []
691  location_neighbors = cell.get_location_neighbors(self.__amount_intervals)
692 
693  for i in range(len(location_neighbors)):
694  key = self.__location_to_key(location_neighbors[i])
695  candidate_neighbor = self.__cell_map[key]
696 
697  if not candidate_neighbor.visited:
698  candidate_neighbor.visited = True
699  neighbors.append(candidate_neighbor)
700 
701  return neighbors
702 
703 
704  def __create_grid(self):
705  """!
706  @brief Creates CLIQUE grid that consists of CLIQUE blocks for clustering process.
707 
708  """
709  data_sizes, min_corner, max_corner = self.__get_data_size_derscription()
710  dimension = len(self.__data[0])
711 
712  cell_sizes = [dimension_length / self.__amount_intervals for dimension_length in data_sizes]
713 
714  self.__cells = [clique_block() for _ in range(pow(self.__amount_intervals, dimension))]
715  iterator = coordinate_iterator(dimension, self.__amount_intervals)
716 
717  point_availability = [True] * len(self.__data)
718  self.__cell_map = {}
719  for index_cell in range(len(self.__cells)):
720  logical_location = iterator.get_coordinate()
721  iterator.increment()
722 
723  self.__cells[index_cell].logical_location = logical_location[:]
724 
725  cur_max_corner, cur_min_corner = self.__get_spatial_location(logical_location, min_corner, max_corner, cell_sizes)
726  self.__cells[index_cell].spatial_location = spatial_block(cur_max_corner, cur_min_corner)
727 
728  self.__cells[index_cell].capture_points(self.__data, point_availability)
729 
730  self.__cell_map[self.__location_to_key(logical_location)] = self.__cells[index_cell]
731 
732 
733  def __location_to_key(self, location):
734  """!
735  @brief Forms key using logical location of a CLIQUE block.
736 
737  @return (string) Key for CLIQUE block map.
738 
739  """
740  return ''.join(str(e) + '.' for e in location)
741 
742 
743  def __get_spatial_location(self, logical_location, min_corner, max_corner, cell_sizes):
744  """!
745  @brief Calculates spatial location for CLIQUE block with logical coordinates defined by logical_location.
746 
747  @param[in] logical_location (list): Logical location of CLIQUE block for that spatial location should be calculated.
748  @param[in] min_corner (list): Minimum corner of an input data.
749  @param[in] max_corner (list): Maximum corner of an input data.
750  @param[in] cell_sizes (list): Size of CLIQUE block in each dimension.
751 
752  @return (list, list): Maximum and minimum corners for the specified CLIQUE block.
753 
754  """
755  cur_min_corner = min_corner[:]
756  cur_max_corner = min_corner[:]
757  dimension = len(self.__data[0])
758  for index_dimension in range(dimension):
759  cur_min_corner[index_dimension] += cell_sizes[index_dimension] * logical_location[index_dimension]
760 
761  if logical_location[index_dimension] == self.__amount_intervals - 1:
762  cur_max_corner[index_dimension] = max_corner[index_dimension]
763  else:
764  cur_max_corner[index_dimension] = cur_min_corner[index_dimension] + cell_sizes[index_dimension]
765 
766  return cur_max_corner, cur_min_corner
767 
768 
769  def __get_data_size_derscription(self):
770  """!
771  @brief Calculates input data description that is required to create CLIQUE grid.
772 
773  @return (list, list, list): Data size in each dimension, minimum and maximum corners.
774 
775  """
776  min_corner = self.__data[0][:]
777  max_corner = self.__data[0][:]
778 
779  dimension = len(self.__data[0])
780 
781  for index_point in range(1, len(self.__data)):
782  for index_dimension in range(dimension):
783  coordinate = self.__data[index_point][index_dimension]
784  if coordinate > max_corner[index_dimension]:
785  max_corner[index_dimension] = coordinate
786 
787  if coordinate < min_corner[index_dimension]:
788  min_corner[index_dimension] = coordinate
789 
790  data_sizes = [0.0] * dimension
791  for index_dimension in range(dimension):
792  data_sizes[index_dimension] = max_corner[index_dimension] - min_corner[index_dimension]
793 
794  return data_sizes, min_corner, max_corner
Common visualizer of clusters on 1D, 2D or 3D surface.
Definition: __init__.py:359
pyclustering module for cluster analysis.
Definition: __init__.py:1
Geometrical description of CLIQUE block in data space.
Definition: clique.py:172
def spatial_location(self)
Spatial location is represented by real data space coordinates.
Definition: clique.py:287
def points(self)
Points that belong to the CLIQUE block.
Definition: clique.py:315
def __get_data_size_derscription(self)
Calculates input data description that is required to create CLIQUE grid.
Definition: clique.py:769
def __str__(self)
Returns string block description.
Definition: clique.py:193
Class implements CLIQUE grid based clustering algorithm.
Definition: clique.py:437
def __validate_arguments(self)
Check input arguments of CLIQUE algorithm and if one of them is not correct then appropriate exceptio...
Definition: clique.py:626
def capture_points(self, data, point_availability)
Finds points that belong to this block using availability map to reduce computational complexity by c...
Definition: clique.py:350
def get_cells(self)
Returns CLIQUE blocks that are formed during clustering process.
Definition: clique.py:573
def __init__(self, data, amount_intervals, density_threshold, kwargs)
Create CLIQUE clustering algorithm.
Definition: clique.py:490
Visualizer of CLIQUE algorithm&#39;s results.
Definition: clique.py:50
def __process_by_ccore(self)
Performs cluster analysis using C++ implementation of CLIQUE algorithm that is used by default if use...
Definition: clique.py:598
def process(self)
Performs clustering process in line with rules of CLIQUE clustering algorithm.
Definition: clique.py:523
Module for representing clustering results.
Definition: encoder.py:1
def show_grid(cells, data)
Show CLIQUE blocks as a grid in data space.
Definition: clique.py:61
def logical_location(self)
Logical location is represented by coordinates in CLIQUE grid, for example, in case of 2x2 grid block...
Definition: clique.py:267
def __get_spatial_location(self, logical_location, min_corner, max_corner, cell_sizes)
Calculates spatial location for CLIQUE block with logical coordinates defined by logical_location.
Definition: clique.py:743
def __allocate_clusters(self)
Performs cluster analysis using formed CLIQUE blocks.
Definition: clique.py:643
def show_clusters(data, clusters, noise=None)
Display CLIQUE clustering results.
Definition: clique.py:92
def __process_by_python(self)
Performs cluster analysis using Python implementation of CLIQUE algorithm.
Definition: clique.py:615
def __expand_cluster(self, cell)
Tries to expand cluster from specified cell.
Definition: clique.py:653
CLIQUE block contains information about its logical location in grid, spatial location in data space ...
Definition: clique.py:228
def increment(self)
Forms logical location for next block.
Definition: clique.py:421
def __repr__(self)
Returns string representation of the block using its logical location in CLIQUE grid.
Definition: clique.py:259
def get_clusters(self)
Returns allocated clusters.
Definition: clique.py:543
def dimensions(self)
Amount of dimensions where CLIQUE block is located.
Definition: clique.py:306
def get_noise(self)
Returns allocated noise.
Definition: clique.py:558
def __get_neighbors(self, cell)
Returns neighbors for specified CLIQUE block as clique_block objects.
Definition: clique.py:683
def get_location_neighbors(self, edge)
Forms list of logical location of each neighbor for this particular CLIQUE block. ...
Definition: clique.py:367
def __init__(self, logical_location=None, spatial_location=None, points=None, visited=False)
Initializes CLIQUE block.
Definition: clique.py:235
def visited(self)
Defines whether block is visited during cluster analysis.
Definition: clique.py:328
def __location_to_key(self, location)
Forms key using logical location of a CLIQUE block.
Definition: clique.py:733
def __str__(self)
Returns string representation of the block using its logical location in CLIQUE grid.
Definition: clique.py:252
def __create_grid(self)
Creates CLIQUE grid that consists of CLIQUE blocks for clustering process.
Definition: clique.py:704
def __contains__(self, point)
Point is considered as contained if it lies in block (belong to it).
Definition: clique.py:203
def get_corners(self)
Return spatial description of current block.
Definition: clique.py:217
def __init__(self, dimension, intervals)
Initializes coordinate iterator for CLIQUE algorithm.
Definition: clique.py:400
def get_coordinate(self)
Returns current block coordinate.
Definition: clique.py:413
def __init__(self, max_corner, min_corner)
Creates spatial block in data space.
Definition: clique.py:181
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: clique.py:585
Coordinate iterator is used to generate logical location description for each CLIQUE block...
Definition: clique.py:393