optics.py
1 """!
2 
3 @brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
4 @details Implementation based on paper @cite article::optics::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2018
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import math
29 import warnings
30 
31 try:
32  import matplotlib.pyplot as plt
33 except Exception as error_instance:
34  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
35  "functionality is not available (details: '%s')." % str(error_instance))
36 
37 from pyclustering.container.kdtree import kdtree
38 
39 from pyclustering.cluster.encoder import type_encoding
40 
41 from pyclustering.utils.color import color as color_list
42 
43 from pyclustering.core.wrapper import ccore_library
44 
45 import pyclustering.core.optics_wrapper as wrapper
46 
47 
49  """!
50  @brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
51  @details This OPTICS algorithm is KD-tree optimized.
52 
53  @see ordering_analyser
54 
55  """
56 
57  @staticmethod
58  def show_ordering_diagram(analyser, amount_clusters = None):
59  """!
60  @brief Display cluster-ordering (reachability-plot) diagram.
61 
62  @param[in] analyser (ordering_analyser): cluster-ordering analyser whose ordering diagram should be displayed.
63  @param[in] amount_clusters (uint): if it is not 'None' then it displays connectivity radius line that can used for allocation of specified amount of clusters
64  and colorize diagram by corresponding cluster colors.
65 
66  Example demonstrates general abilities of 'ordering_visualizer' class:
67  @code
68  # Display cluster-ordering diagram with connectivity radius is used for allocation of three clusters.
69  ordering_visualizer.show_ordering_diagram(analyser, 3);
70 
71  # Display cluster-ordering diagram without radius.
72  ordering_visualizer.show_ordering_diagram(analyser);
73  @endcode
74 
75  """
76  ordering = analyser.cluster_ordering
77  axis = plt.subplot(111)
78 
79  if amount_clusters is not None:
80  radius, borders = analyser.calculate_connvectivity_radius(amount_clusters)
81 
82  # divide into cluster groups to visualize by colors
83  left_index_border = 0
84  current_index_border = 0
85  for index_border in range(len(borders)):
86  right_index_border = borders[index_border]
87  axis.bar(range(left_index_border, right_index_border), ordering[left_index_border:right_index_border], width = 1.0, color = color_list.TITLES[index_border])
88  left_index_border = right_index_border
89  current_index_border = index_border
90 
91  axis.bar(range(left_index_border, len(ordering)), ordering[left_index_border:len(ordering)], width = 1.0, color = color_list.TITLES[current_index_border + 1])
92 
93  plt.xlim([0, len(ordering)])
94 
95  plt.axhline(y = radius, linewidth = 2, color = 'black')
96  plt.text(0, radius + radius * 0.03, " Radius: " + str(round(radius, 4)) + ";\n Clusters: " + str(amount_clusters), color = 'b', fontsize = 10)
97 
98  else:
99  axis.bar(range(0, len(ordering)), ordering[0:len(ordering)], width = 1.0, color = 'black')
100  plt.xlim([0, len(ordering)])
101 
102  plt.show()
103 
104 
106  """!
107  @brief Analyser of cluster ordering diagram.
108  @details Using cluster-ordering it is able to connectivity radius for allocation of specified amount of clusters and
109  calculate amount of clusters using specified connectivity radius. Cluster-ordering is formed by OPTICS algorithm
110  during cluster analysis.
111 
112  @see optics
113 
114  """
115 
116  @property
117  def cluster_ordering(self):
118  """!
119  @brief (list) Returns values of dataset cluster ordering.
120 
121  """
122  return self.__ordering
123 
124 
125  def __init__(self, ordering_diagram):
126  """!
127  @brief Analyser of ordering diagram that is based on reachability-distances.
128 
129  @see calculate_connvectivity_radius
130 
131  """
132  self.__ordering = ordering_diagram
133 
134 
135  def __len__(self):
136  """!
137  @brief Returns length of clustering-ordering diagram.
138 
139  """
140  return len(self.__ordering)
141 
142 
143  def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations = 100):
144  """!
145  @brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and marks borders of clusters using indexes of values of ordering diagram.
146  @details Parameter 'maximum_iterations' is used to protect from hanging when it is impossible to allocate specified number of clusters.
147 
148  @param[in] amount_clusters (uint): amount of clusters that should be allocated by calculated connectivity radius.
149  @param[in] maximum_iterations (uint): maximum number of iteration for searching connectivity radius to allocated specified amount of clusters (by default it is restricted by 100 iterations).
150 
151  @return (double, list) Value of connectivity radius and borders of clusters like (radius, borders), radius may be 'None' as well as borders may be '[]'
152  if connectivity radius hasn't been found for the specified amount of iterations.
153 
154  """
155 
156  maximum_distance = max(self.__ordering)
157 
158  upper_distance = maximum_distance
159  lower_distance = 0.0
160 
161  result = None
162 
163  amount, borders = self.extract_cluster_amount(maximum_distance)
164  if amount <= amount_clusters:
165  for _ in range(maximum_iterations):
166  radius = (lower_distance + upper_distance) / 2.0
167 
168  amount, borders = self.extract_cluster_amount(radius)
169  if amount == amount_clusters:
170  result = radius
171  break
172 
173  elif amount == 0:
174  break
175 
176  elif amount > amount_clusters:
177  lower_distance = radius
178 
179  elif amount < amount_clusters:
180  upper_distance = radius
181 
182  return result, borders
183 
184 
185  def extract_cluster_amount(self, radius):
186  """!
187  @brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and borders between them.
188  @details When growth of reachability-distances is detected than it is considered as a start point of cluster,
189  than pick is detected and after that recession is observed until new growth (that means end of the
190  current cluster and start of a new one) or end of diagram.
191 
192  @param[in] radius (double): connectivity radius that is used for cluster allocation.
193 
194  @return (unit, list) Amount of clusters that can be allocated by the connectivity radius on ordering diagram and borders between them using indexes
195  from ordering diagram (amount_clusters, border_clusters).
196 
197  """
198 
199  amount_clusters = 1
200 
201  cluster_start = False
202  cluster_pick = False
203  total_similarity = True
204  previous_cluster_distance = None
205  previous_distance = None
206 
207  cluster_borders = []
208 
209  for index_ordering in range(len(self.__ordering)):
210  distance = self.__ordering[index_ordering]
211  if distance >= radius:
212  if cluster_start is False:
213  cluster_start = True
214  amount_clusters += 1
215 
216  if index_ordering != 0:
217  cluster_borders.append(index_ordering)
218 
219  else:
220  if (distance < previous_cluster_distance) and (cluster_pick is False):
221  cluster_pick = True
222 
223  elif (distance > previous_cluster_distance) and (cluster_pick is True):
224  cluster_pick = False
225  amount_clusters += 1
226 
227  if index_ordering != 0:
228  cluster_borders.append(index_ordering)
229 
230  previous_cluster_distance = distance
231 
232  else:
233  cluster_start = False
234  cluster_pick = False
235 
236  if (previous_distance is not None) and (distance != previous_distance):
237  total_similarity = False
238 
239  previous_distance = distance
240 
241  if (total_similarity is True) and (previous_distance > radius):
242  amount_clusters = 0
243 
244  return amount_clusters, cluster_borders
245 
246 
248  """!
249  @brief Object description that used by OPTICS algorithm for cluster analysis.
250 
251  """
252 
253  def __init__(self, index, core_distance = None, reachability_distance = None):
254  """!
255  @brief Constructor of object description in optics terms.
256 
257  @param[in] index (uint): Index of the object in the data set.
258  @param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
259  @param[in] reachability_distance (double): Reachability distance to this object.
260 
261  """
262 
263 
264  self.index_object = index
265 
266 
267  self.core_distance = core_distance
268 
269 
270  self.reachability_distance = reachability_distance
271 
272 
273  self.processed = False
274 
275  def __repr__(self):
276  """!
277  @brief Returns string representation of the optics descriptor.
278 
279  """
280 
281  return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance);
282 
283 
284 class optics:
285  """!
286  @brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with KD-tree optimization (ccore options is supported).
287  @details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
288  Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
289  for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
290  bigger than real - because it will be calculated by the algorithms if requested amount of clusters is not allocated.
291 
292  CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.
293 
294  @image html optics_example_clustering.png "Scheme how does OPTICS works. At the beginning only one cluster is allocated, but two is requested. At the second step OPTICS calculates connectivity radius using cluster-ordering and performs final cluster allocation."
295 
296  Example:
297  @code
298  # Read sample for clustering from some file
299  sample = read_sample(path_sample);
300 
301  # Create OPTICS algorithm for cluster analysis
302  optics_instance = optics(sample, 0.5, 6);
303 
304  # Run cluster analysis
305  optics_instance.process();
306 
307  # Obtain results of clustering
308  clusters = optics_instance.get_clusters();
309  noise = optics_instance.get_noise();
310 
311  # Obtain rechability-distances
312  ordering = ordering_analyser(optics_instance.get_ordering());
313 
314  # Visualization of cluster ordering in line with reachability distance.
315  ordering_visualizer.show_ordering_diagram(ordering);
316  @endcode
317 
318  Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
319  @code
320  # Import required packages
321  from pyclustering.cluster.optics import optics;
322  from pyclustering.samples.definitions import FCPS_SAMPLES;
323  from pyclustering.utils import read_sample;
324 
325  # Read sample for clustering from some file
326  sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN);
327 
328  # Run cluster analysis where connvectivity radius is bigger than real
329  radius = 2.0;
330  neighbors = 3;
331  amount_of_clusters = 3;
332 
333  optics_instance = optics(sample, radius, neighbors, amount_of_clusters);
334 
335  # Obtain results of clustering
336  clusters = optics_instance.get_clusters();
337  noise = optics_instance.get_noise();
338  @endcode
339 
340  """
341 
342  def __init__(self, sample, eps, minpts, amount_clusters = None, ccore = True, **kwargs):
343  """!
344  @brief Constructor of clustering algorithm OPTICS.
345 
346  @param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
347  @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
348  @param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
349  @param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
350  In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
351  in connectivity radius usage.
352  @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
353  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').
354 
355  <b>Keyword Args:</b><br>
356  - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
357 
358  """
359 
360  self.__sample_pointer = sample # Algorithm parameter - pointer to sample for processing.
361  self.__eps = eps # Algorithm parameter - connectivity radius between object for establish links between object.
362  self.__minpts = minpts # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
363  self.__amount_clusters = amount_clusters
364 
365  self.__ordering = None
366  self.__clusters = None
367  self.__noise = None
368  self.__optics_objects = None
369 
370  self.__data_type = kwargs.get('data_type', 'points')
371 
372  self.__kdtree = None
373  self.__ccore = ccore
374 
376 
377  if self.__ccore:
378  self.__ccore = ccore_library.workable()
379 
380 
381  def process(self):
382  """!
383  @brief Performs cluster analysis in line with rules of OPTICS algorithm.
384 
385  @remark Results of clustering can be obtained using corresponding gets methods.
386 
387  @see get_clusters()
388  @see get_noise()
389  @see get_ordering()
390 
391  """
392 
393  if self.__ccore is True:
394  self.__process_by_ccore()
395 
396  else:
397  self.__process_by_python()
398 
399 
400  def __process_by_ccore(self):
401  """!
402  @brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
403 
404  """
405 
406  (self.__clusters, self.__noise, self.__ordering, self.__eps,
407  objects_indexes, objects_core_distances, objects_reachability_distances) = \
408  wrapper.optics(self.__sample_pointer, self.__eps, self.__minpts, self.__amount_clusters, self.__data_type)
409 
410  self.__optics_objects = []
411  for i in range(len(objects_indexes)):
412  if objects_core_distances[i] < 0.0:
413  objects_core_distances[i] = None
414 
415  if objects_reachability_distances[i] < 0.0:
416  objects_reachability_distances[i] = None
417 
418  optics_object = optics_descriptor(objects_indexes[i], objects_core_distances[i], objects_reachability_distances[i])
419  optics_object.processed = True
420 
421  self.__optics_objects.append(optics_object)
422 
423 
424  def __process_by_python(self):
425  """!
426  @brief Performs cluster analysis using python code.
427 
428  """
429 
430  if self.__data_type == 'points':
431  self.__kdtree = kdtree(self.__sample_pointer, range(len(self.__sample_pointer)))
432 
433  self.__allocate_clusters()
434 
435  if (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())):
436  analyser = ordering_analyser(self.get_ordering())
437  radius, _ = analyser.calculate_connvectivity_radius(self.__amount_clusters)
438  if radius is not None:
439  self.__eps = radius
440  self.__allocate_clusters()
441 
442 
443  def __initialize(self, sample):
444  """!
445  @brief Initializes internal states and resets clustering results in line with input sample.
446 
447  """
448 
449  self.__processed = [False] * len(sample)
450  self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))] # List of OPTICS objects that corresponds to objects from input sample.
451  self.__ordered_database = [] # List of OPTICS objects in traverse order.
452 
453  self.__clusters = None # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
454  self.__noise = None # Result of clustering (noise).
455 
456 
457  def __allocate_clusters(self):
458  """!
459  @brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
460 
461  """
462 
463  self.__initialize(self.__sample_pointer)
464 
465  for optic_object in self.__optics_objects:
466  if optic_object.processed is False:
467  self.__expand_cluster_order(optic_object)
468 
469  self.__extract_clusters()
470 
471 
472  def get_clusters(self):
473  """!
474  @brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
475 
476  @return (list) List of allocated clusters.
477 
478  @see process()
479  @see get_noise()
480  @see get_ordering()
481  @see get_radius()
482 
483  """
484 
485  return self.__clusters
486 
487 
488  def get_noise(self):
489  """!
490  @brief Returns list of noise that contains indexes of objects that corresponds to input data.
491 
492  @return (list) List of allocated noise objects.
493 
494  @see process()
495  @see get_clusters()
496  @see get_ordering()
497  @see get_radius()
498 
499  """
500 
501  return self.__noise
502 
503 
504  def get_ordering(self):
505  """!
506  @brief Returns clustering ordering information about the input data set.
507  @details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
508 
509  @return (ordering_analyser) Analyser of clustering ordering.
510 
511  @see process()
512  @see get_clusters()
513  @see get_noise()
514  @see get_radius()
515  @see get_optics_objects()
516 
517  """
518 
519  if self.__ordering is None:
520  self.__ordering = []
521 
522  for cluster in self.__clusters:
523  for index_object in cluster:
524  optics_object = self.__optics_objects[index_object]
525  if optics_object.reachability_distance is not None:
526  self.__ordering.append(optics_object.reachability_distance)
527 
528  return self.__ordering
529 
530 
532  """!
533  @brief Returns OPTICS objects where each object contains information about index of point from processed data,
534  core distance and reachability distance.
535 
536  @return (list) OPTICS objects.
537 
538  @see get_ordering()
539  @see get_clusters()
540  @see get_noise()
541  @see optics_descriptor
542 
543  """
544 
545  return self.__optics_objects
546 
547 
548  def get_radius(self):
549  """!
550  @brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
551  @details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
552 
553  @return (double) Connectivity radius.
554 
555  @see get_ordering()
556  @see get_clusters()
557  @see get_noise()
558  @see get_optics_objects()
559 
560  """
561 
562  return self.__eps
563 
564 
566  """!
567  @brief Returns clustering result representation type that indicate how clusters are encoded.
568 
569  @return (type_encoding) Clustering result representation.
570 
571  @see get_clusters()
572 
573  """
574 
575  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
576 
577 
578  def __create_neighbor_searcher(self, data_type):
579  """!
580  @brief Returns neighbor searcher in line with data type.
581 
582  @param[in] data_type (string): Data type (points or distance matrix).
583 
584  """
585  if data_type == 'points':
586  return self.__neighbor_indexes_points
587  elif data_type == 'distance_matrix':
589  else:
590  raise TypeError("Unknown type of data is specified '%s'" % data_type)
591 
592 
593  def __expand_cluster_order(self, optics_object):
594  """!
595  @brief Expand cluster order from not processed optic-object that corresponds to object from input data.
596  Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
597  Order database is updated during expanding.
598 
599  @param[in] optics_object (optics_descriptor): Object that hasn't been processed.
600 
601  """
602 
603  optics_object.processed = True
604 
605  neighbors_descriptor = self.__neighbor_searcher(optics_object)
606  optics_object.reachability_distance = None
607 
608  self.__ordered_database.append(optics_object)
609 
610  # Check core distance
611  if len(neighbors_descriptor) >= self.__minpts:
612  neighbors_descriptor.sort(key = lambda obj: obj[1])
613  optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1]
614 
615  # Continue processing
616  order_seed = list()
617  self.__update_order_seed(optics_object, neighbors_descriptor, order_seed)
618 
619  while len(order_seed) > 0:
620  optic_descriptor = order_seed[0]
621  order_seed.remove(optic_descriptor)
622 
623  neighbors_descriptor = self.__neighbor_searcher(optic_descriptor)
624  optic_descriptor.processed = True
625 
626  self.__ordered_database.append(optic_descriptor)
627 
628  if len(neighbors_descriptor) >= self.__minpts:
629  neighbors_descriptor.sort(key = lambda obj: obj[1])
630  optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1]
631 
632  self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed)
633  else:
634  optic_descriptor.core_distance = None
635 
636  else:
637  optics_object.core_distance = None
638 
639 
640  def __extract_clusters(self):
641  """!
642  @brief Extract clusters and noise from order database.
643 
644  """
645 
646  self.__clusters = []
647  self.__noise = []
648 
649  current_cluster = self.__noise
650  for optics_object in self.__ordered_database:
651  if (optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps):
652  if (optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps):
653  self.__clusters.append([ optics_object.index_object ])
654  current_cluster = self.__clusters[-1]
655  else:
656  self.__noise.append(optics_object.index_object)
657  else:
658  current_cluster.append(optics_object.index_object)
659 
660 
661  def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
662  """!
663  @brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
664 
665  @param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
666  @param[in] neighbors_descriptors (list): List of neighbors of core-object.
667  @param[in|out] order_seed (list): List of sorted object in line with reachable distance.
668 
669  """
670 
671  for neighbor_descriptor in neighbors_descriptors:
672  index_neighbor = neighbor_descriptor[0]
673  current_reachable_distance = neighbor_descriptor[1]
674 
675  if self.__optics_objects[index_neighbor].processed is not True:
676  reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance)
677  if self.__optics_objects[index_neighbor].reachability_distance is None:
678  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
679 
680  # insert element in queue O(n) - worst case.
681  index_insertion = len(order_seed)
682  for index_seed in range(0, len(order_seed)):
683  if reachable_distance < order_seed[index_seed].reachability_distance:
684  index_insertion = index_seed
685  break
686 
687  order_seed.insert(index_insertion, self.__optics_objects[index_neighbor])
688 
689  else:
690  if reachable_distance < self.__optics_objects[index_neighbor].reachability_distance:
691  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
692  order_seed.sort(key = lambda obj: obj.reachability_distance)
693 
694 
695  def __neighbor_indexes_points(self, optic_object):
696  """!
697  @brief Return neighbors of the specified object in case of sequence of points.
698 
699  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
700 
701  @return (list) List of indexes of neighbors in line the connectivity radius.
702 
703  """
704  kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps)
705  return [[node_tuple[1].payload, math.sqrt(node_tuple[0])] for node_tuple in kdnodes if
706  node_tuple[1].payload != optic_object.index_object]
707 
708 
709  def __neighbor_indexes_distance_matrix(self, optic_object):
710  """!
711  @brief Return neighbors of the specified object in case of distance matrix.
712 
713  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
714 
715  @return (list) List of indexes of neighbors in line the connectivity radius.
716 
717  """
718  distances = self.__sample_pointer[optic_object.index_object]
719  return [[index_neighbor, distances[index_neighbor]] for index_neighbor in range(len(distances))
720  if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]
Object description that used by OPTICS algorithm for cluster analysis.
Definition: optics.py:247
def process(self)
Performs cluster analysis in line with rules of OPTICS algorithm.
Definition: optics.py:381
def __neighbor_indexes_points(self, optic_object)
Return neighbors of the specified object in case of sequence of points.
Definition: optics.py:695
def __len__(self)
Returns length of clustering-ordering diagram.
Definition: optics.py:135
def show_ordering_diagram(analyser, amount_clusters=None)
Display cluster-ordering (reachability-plot) diagram.
Definition: optics.py:58
def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed)
Update sorted list of reachable objects (from core-object) that should be processed using neighbors o...
Definition: optics.py:661
def get_ordering(self)
Returns clustering ordering information about the input data set.
Definition: optics.py:504
Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with ...
Definition: optics.py:284
def get_optics_objects(self)
Returns OPTICS objects where each object contains information about index of point from processed dat...
Definition: optics.py:531
def __repr__(self)
Returns string representation of the optics descriptor.
Definition: optics.py:275
def cluster_ordering(self)
(list) Returns values of dataset cluster ordering.
Definition: optics.py:117
reachability_distance
Reachability distance - the smallest distance to be reachable by core object.
Definition: optics.py:270
Module for representing clustering results.
Definition: encoder.py:1
Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimension...
Definition: kdtree.py:157
def __create_neighbor_searcher(self, data_type)
Returns neighbor searcher in line with data type.
Definition: optics.py:578
Colors used by pyclustering library for visualization.
Definition: color.py:1
index_object
Index of object from the input data.
Definition: optics.py:264
processed
True is object has been already traversed.
Definition: optics.py:273
core_distance
Core distance - the smallest distance to reach specified number of neighbors that is not greater then...
Definition: optics.py:267
def __init__(self, index, core_distance=None, reachability_distance=None)
Constructor of object description in optics terms.
Definition: optics.py:253
def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, kwargs)
Constructor of clustering algorithm OPTICS.
Definition: optics.py:342
def get_clusters(self)
Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster i...
Definition: optics.py:472
def __extract_clusters(self)
Extract clusters and noise from order database.
Definition: optics.py:640
Analyser of cluster ordering diagram.
Definition: optics.py:105
def __initialize(self, sample)
Initializes internal states and resets clustering results in line with input sample.
Definition: optics.py:443
def __allocate_clusters(self)
Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
Definition: optics.py:457
def extract_cluster_amount(self, radius)
Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and...
Definition: optics.py:185
def get_noise(self)
Returns list of noise that contains indexes of objects that corresponds to input data.
Definition: optics.py:488
def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations=100)
Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and ...
Definition: optics.py:143
def __process_by_ccore(self)
Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
Definition: optics.py:400
def __neighbor_indexes_distance_matrix(self, optic_object)
Return neighbors of the specified object in case of distance matrix.
Definition: optics.py:709
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: optics.py:565
def __process_by_python(self)
Performs cluster analysis using python code.
Definition: optics.py:424
Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering s...
Definition: optics.py:48
def __init__(self, ordering_diagram)
Analyser of ordering diagram that is based on reachability-distances.
Definition: optics.py:125
def get_radius(self)
Returns connectivity radius that is calculated and used for clustering by the algorithm.
Definition: optics.py:548
Data Structure: KD-Tree.
Definition: kdtree.py:1
def __expand_cluster_order(self, optics_object)
Expand cluster order from not processed optic-object that corresponds to object from input data...
Definition: optics.py:593