pyclustering  0.10.1
pyclustring is a Python, C++ data mining library.
optics.py
1 """!
2 
3 @brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
4 @details Implementation based on paper @cite article::optics::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright BSD-3-Clause
9 
10 """
11 
12 
13 import math
14 import matplotlib.pyplot as plt
15 
16 from pyclustering.container.kdtree import kdtree_balanced
17 
18 from pyclustering.cluster.encoder import type_encoding
19 
20 from pyclustering.utils.color import color as color_list
21 
22 from pyclustering.core.wrapper import ccore_library
23 
24 import pyclustering.core.optics_wrapper as wrapper
25 
26 
28  """!
29  @brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
30  @details This OPTICS algorithm is KD-tree optimized.
31 
32  @see ordering_analyser
33 
34  """
35 
36  @staticmethod
37  def show_ordering_diagram(analyser, amount_clusters = None):
38  """!
39  @brief Display cluster-ordering (reachability-plot) diagram.
40 
41  @param[in] analyser (ordering_analyser): cluster-ordering analyser whose ordering diagram should be displayed.
42  @param[in] amount_clusters (uint): if it is not 'None' then it displays connectivity radius line that can used for allocation of specified amount of clusters
43  and colorize diagram by corresponding cluster colors.
44 
45  Example demonstrates general abilities of 'ordering_visualizer' class:
46  @code
47  # Display cluster-ordering diagram with connectivity radius is used for allocation of three clusters.
48  ordering_visualizer.show_ordering_diagram(analyser, 3);
49 
50  # Display cluster-ordering diagram without radius.
51  ordering_visualizer.show_ordering_diagram(analyser);
52  @endcode
53 
54  """
55  ordering = analyser.cluster_ordering
56  axis = plt.subplot(111)
57 
58  if amount_clusters is not None:
59  radius, borders = analyser.calculate_connvectivity_radius(amount_clusters)
60 
61  # divide into cluster groups to visualize by colors
62  left_index_border = 0
63  current_index_border = 0
64  for index_border in range(len(borders)):
65  right_index_border = borders[index_border]
66  axis.bar(range(left_index_border, right_index_border), ordering[left_index_border:right_index_border], width = 1.0, color = color_list.TITLES[index_border])
67  left_index_border = right_index_border
68  current_index_border = index_border
69 
70  axis.bar(range(left_index_border, len(ordering)), ordering[left_index_border:len(ordering)], width = 1.0, color = color_list.TITLES[current_index_border + 1])
71 
72  plt.xlim([0, len(ordering)])
73 
74  plt.axhline(y = radius, linewidth = 2, color = 'black')
75  plt.text(0, radius + radius * 0.03, " Radius: " + str(round(radius, 4)) + ";\n Clusters: " + str(amount_clusters), color = 'b', fontsize = 10)
76 
77  else:
78  axis.bar(range(0, len(ordering)), ordering[0:len(ordering)], width = 1.0, color = 'black')
79  plt.xlim([0, len(ordering)])
80 
81  plt.show()
82 
83 
85  """!
86  @brief Analyser of cluster ordering diagram.
87  @details Using cluster-ordering it is able to connectivity radius for allocation of specified amount of clusters and
88  calculate amount of clusters using specified connectivity radius. Cluster-ordering is formed by OPTICS algorithm
89  during cluster analysis.
90 
91  @see optics
92 
93  """
94 
95  @property
96  def cluster_ordering(self):
97  """!
98  @brief (list) Returns values of dataset cluster ordering.
99 
100  """
101  return self.__ordering
102 
103 
104  def __init__(self, ordering_diagram):
105  """!
106  @brief Analyser of ordering diagram that is based on reachability-distances.
107 
108  @see calculate_connvectivity_radius
109 
110  """
111  self.__ordering = ordering_diagram
112 
113 
114  def __len__(self):
115  """!
116  @brief Returns length of clustering-ordering diagram.
117 
118  """
119  return len(self.__ordering)
120 
121 
122  def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations = 100):
123  """!
124  @brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and marks borders of clusters using indexes of values of ordering diagram.
125  @details Parameter 'maximum_iterations' is used to protect from hanging when it is impossible to allocate specified number of clusters.
126 
127  @param[in] amount_clusters (uint): amount of clusters that should be allocated by calculated connectivity radius.
128  @param[in] maximum_iterations (uint): maximum number of iteration for searching connectivity radius to allocated specified amount of clusters (by default it is restricted by 100 iterations).
129 
130  @return (double, list) Value of connectivity radius and borders of clusters like (radius, borders), radius may be 'None' as well as borders may be '[]'
131  if connectivity radius hasn't been found for the specified amount of iterations.
132 
133  """
134 
135  maximum_distance = max(self.__ordering)
136 
137  upper_distance = maximum_distance
138  lower_distance = 0.0
139 
140  result = None
141 
142  amount, borders = self.extract_cluster_amount(maximum_distance)
143  if amount <= amount_clusters:
144  for _ in range(maximum_iterations):
145  radius = (lower_distance + upper_distance) / 2.0
146 
147  amount, borders = self.extract_cluster_amount(radius)
148  if amount == amount_clusters:
149  result = radius
150  break
151 
152  elif amount == 0:
153  break
154 
155  elif amount > amount_clusters:
156  lower_distance = radius
157 
158  elif amount < amount_clusters:
159  upper_distance = radius
160 
161  return result, borders
162 
163 
164  def extract_cluster_amount(self, radius):
165  """!
166  @brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and borders between them.
167  @details When growth of reachability-distances is detected than it is considered as a start point of cluster,
168  than pick is detected and after that recession is observed until new growth (that means end of the
169  current cluster and start of a new one) or end of diagram.
170 
171  @param[in] radius (double): connectivity radius that is used for cluster allocation.
172 
173  @return (unit, list) Amount of clusters that can be allocated by the connectivity radius on ordering diagram and borders between them using indexes
174  from ordering diagram (amount_clusters, border_clusters).
175 
176  """
177 
178  amount_clusters = 1
179 
180  cluster_start = False
181  cluster_pick = False
182  total_similarity = True
183  previous_cluster_distance = None
184  previous_distance = None
185 
186  cluster_borders = []
187 
188  for index_ordering in range(len(self.__ordering)):
189  distance = self.__ordering[index_ordering]
190  if distance >= radius:
191  if cluster_start is False:
192  cluster_start = True
193  amount_clusters += 1
194 
195  if index_ordering != 0:
196  cluster_borders.append(index_ordering)
197 
198  else:
199  if (distance < previous_cluster_distance) and (cluster_pick is False):
200  cluster_pick = True
201 
202  elif (distance > previous_cluster_distance) and (cluster_pick is True):
203  cluster_pick = False
204  amount_clusters += 1
205 
206  if index_ordering != 0:
207  cluster_borders.append(index_ordering)
208 
209  previous_cluster_distance = distance
210 
211  else:
212  cluster_start = False
213  cluster_pick = False
214 
215  if (previous_distance is not None) and (distance != previous_distance):
216  total_similarity = False
217 
218  previous_distance = distance
219 
220  if (total_similarity is True) and (previous_distance > radius):
221  amount_clusters = 0
222 
223  return amount_clusters, cluster_borders
224 
225 
227  """!
228  @brief Object description that used by OPTICS algorithm for cluster analysis.
229 
230  """
231 
232  def __init__(self, index, core_distance = None, reachability_distance = None):
233  """!
234  @brief Constructor of object description in optics terms.
235 
236  @param[in] index (uint): Index of the object in the data set.
237  @param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
238  @param[in] reachability_distance (double): Reachability distance to this object.
239 
240  """
241 
242 
243  self.index_object = index
244 
245 
246  self.core_distance = core_distance
247 
248 
249  self.reachability_distance = reachability_distance
250 
251 
252  self.processed = False
253 
254  def __repr__(self):
255  """!
256  @brief Returns string representation of the optics descriptor.
257 
258  """
259 
260  return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance)
261 
262 
263 class optics:
264  """!
265  @brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with KD-tree optimization (ccore options is supported).
266  @details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
267  Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
268  for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
269  bigger than real - because it will be calculated by the algorithms if requested amount of clusters is not allocated.
270 
271  @image html optics_example_clustering.png "Scheme how does OPTICS works. At the beginning only one cluster is allocated, but two is requested. At the second step OPTICS calculates connectivity radius using cluster-ordering and performs final cluster allocation."
272 
273  Clustering example using sample 'Chainlink':
274  @code
275  from pyclustering.cluster import cluster_visualizer
276  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
277  from pyclustering.samples.definitions import FCPS_SAMPLES
278  from pyclustering.utils import read_sample
279 
280  # Read sample for clustering from some file.
281  sample = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
282 
283  # Run cluster analysis where connectivity radius is bigger than real.
284  radius = 0.5
285  neighbors = 3
286  optics_instance = optics(sample, radius, neighbors)
287 
288  # Performs cluster analysis.
289  optics_instance.process()
290 
291  # Obtain results of clustering.
292  clusters = optics_instance.get_clusters()
293  noise = optics_instance.get_noise()
294  ordering = optics_instance.get_ordering()
295 
296  # Visualize clustering results.
297  visualizer = cluster_visualizer()
298  visualizer.append_clusters(clusters, sample)
299  visualizer.show()
300 
301  # Display ordering.
302  analyser = ordering_analyser(ordering)
303  ordering_visualizer.show_ordering_diagram(analyser, 2)
304  @endcode
305 
306  Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
307  @code
308  from pyclustering.cluster import cluster_visualizer
309  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
310  from pyclustering.samples.definitions import FCPS_SAMPLES
311  from pyclustering.utils import read_sample
312 
313  # Read sample for clustering from some file
314  sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
315 
316  # Run cluster analysis where connectivity radius is bigger than real
317  radius = 2.0
318  neighbors = 3
319  amount_of_clusters = 3
320  optics_instance = optics(sample, radius, neighbors, amount_of_clusters)
321 
322  # Performs cluster analysis
323  optics_instance.process()
324 
325  # Obtain results of clustering
326  clusters = optics_instance.get_clusters()
327  noise = optics_instance.get_noise()
328  ordering = optics_instance.get_ordering()
329 
330  # Visualize ordering diagram
331  analyser = ordering_analyser(ordering)
332  ordering_visualizer.show_ordering_diagram(analyser, amount_of_clusters)
333 
334  # Visualize clustering results
335  visualizer = cluster_visualizer()
336  visualizer.append_clusters(clusters, sample)
337  visualizer.show()
338  @endcode
339 
340  Here is an example where OPTICS extracts outliers from sample 'Tetra':
341  @code
342  from pyclustering.cluster import cluster_visualizer
343  from pyclustering.cluster.optics import optics
344  from pyclustering.samples.definitions import FCPS_SAMPLES
345  from pyclustering.utils import read_sample
346 
347  # Read sample for clustering from some file.
348  sample = read_sample(FCPS_SAMPLES.SAMPLE_TETRA)
349 
350  # Run cluster analysis where connectivity radius is bigger than real.
351  radius = 0.4
352  neighbors = 3
353  optics_instance = optics(sample, radius, neighbors)
354 
355  # Performs cluster analysis.
356  optics_instance.process()
357 
358  # Obtain results of clustering.
359  clusters = optics_instance.get_clusters()
360  noise = optics_instance.get_noise()
361 
362  # Visualize clustering results (clusters and outliers).
363  visualizer = cluster_visualizer()
364  visualizer.append_clusters(clusters, sample)
365  visualizer.append_cluster(noise, sample, marker='x')
366  visualizer.show()
367  @endcode
368 
369  Visualization result of allocated clusters and outliers is presented on the image below:
370  @image html optics_noise_tetra.png "Clusters and outliers extracted by OPTICS algorithm from sample 'Tetra'."
371 
372  """
373 
374  def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, **kwargs):
375  """!
376  @brief Constructor of clustering algorithm OPTICS.
377 
378  @param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
379  @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
380  @param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
381  @param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
382  In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
383  in connectivity radius usage.
384  @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
385  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').
386 
387  <b>Keyword Args:</b><br>
388  - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
389 
390  """
391 
392  self.__sample_pointer = sample # Algorithm parameter - pointer to sample for processing.
393  self.__eps = eps # Algorithm parameter - connectivity radius between object for establish links between object.
394  self.__minpts = minpts # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
395  self.__amount_clusters = amount_clusters
396 
397  self.__ordering = None
398  self.__clusters = None
399  self.__noise = None
400  self.__optics_objects = None
401 
402  self.__data_type = kwargs.get('data_type', 'points')
403 
404  self.__kdtree = None
405  self.__ccore = ccore
406 
408 
409  if self.__ccore:
410  self.__ccore = ccore_library.workable()
411 
412  self.__verify_arguments()
413 
414 
415  def process(self):
416  """!
417  @brief Performs cluster analysis in line with rules of OPTICS algorithm.
418 
419  @return (optics) Returns itself (OPTICS instance).
420 
421  @see get_clusters()
422  @see get_noise()
423  @see get_ordering()
424 
425  """
426 
427  if self.__ccore is True:
428  self.__process_by_ccore()
429 
430  else:
431  self.__process_by_python()
432 
433  return self
434 
435 
436  def __process_by_ccore(self):
437  """!
438  @brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
439 
440  """
441 
442  (self.__clusters, self.__noise, self.__ordering, self.__eps,
443  objects_indexes, objects_core_distances, objects_reachability_distances) = \
444  wrapper.optics(self.__sample_pointer, self.__eps, self.__minpts, self.__amount_clusters, self.__data_type)
445 
446  self.__optics_objects = []
447  for i in range(len(objects_indexes)):
448  if objects_core_distances[i] < 0.0:
449  objects_core_distances[i] = None
450 
451  if objects_reachability_distances[i] < 0.0:
452  objects_reachability_distances[i] = None
453 
454  optics_object = optics_descriptor(objects_indexes[i], objects_core_distances[i], objects_reachability_distances[i])
455  optics_object.processed = True
456 
457  self.__optics_objects.append(optics_object)
458 
459 
460  def __process_by_python(self):
461  """!
462  @brief Performs cluster analysis using python code.
463 
464  """
465 
466  if self.__data_type == 'points':
467  self.__kdtree = kdtree_balanced(self.__sample_pointer, range(len(self.__sample_pointer)))
468 
469  self.__allocate_clusters()
470 
471  if (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())):
472  analyser = ordering_analyser(self.get_ordering())
473  radius, _ = analyser.calculate_connvectivity_radius(self.__amount_clusters)
474  if radius is not None:
475  self.__eps = radius
476  self.__allocate_clusters()
477 
478 
479  def __initialize(self, sample):
480  """!
481  @brief Initializes internal states and resets clustering results in line with input sample.
482 
483  """
484 
485  self.__processed = [False] * len(sample)
486  self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))] # List of OPTICS objects that corresponds to objects from input sample.
487  self.__ordered_database = [] # List of OPTICS objects in traverse order.
488 
489  self.__clusters = None # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
490  self.__noise = None # Result of clustering (noise).
491 
492 
493  def __allocate_clusters(self):
494  """!
495  @brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
496 
497  """
498 
499  self.__initialize(self.__sample_pointer)
500 
501  for optic_object in self.__optics_objects:
502  if optic_object.processed is False:
503  self.__expand_cluster_order(optic_object)
504 
505  self.__extract_clusters()
506 
507 
508  def get_clusters(self):
509  """!
510  @brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
511 
512  @return (list) List of allocated clusters.
513 
514  @see process()
515  @see get_noise()
516  @see get_ordering()
517  @see get_radius()
518 
519  """
520 
521  return self.__clusters
522 
523 
524  def get_noise(self):
525  """!
526  @brief Returns list of noise that contains indexes of objects that corresponds to input data.
527 
528  @return (list) List of allocated noise objects.
529 
530  @see process()
531  @see get_clusters()
532  @see get_ordering()
533  @see get_radius()
534 
535  """
536 
537  return self.__noise
538 
539 
540  def get_ordering(self):
541  """!
542  @brief Returns clustering ordering information about the input data set.
543  @details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
544 
545  @return (ordering_analyser) Analyser of clustering ordering.
546 
547  @see process()
548  @see get_clusters()
549  @see get_noise()
550  @see get_radius()
551  @see get_optics_objects()
552 
553  """
554 
555  if self.__ordering is None:
556  self.__ordering = []
557 
558  for cluster in self.__clusters:
559  for index_object in cluster:
560  optics_object = self.__optics_objects[index_object]
561  if optics_object.reachability_distance is not None:
562  self.__ordering.append(optics_object.reachability_distance)
563 
564  return self.__ordering
565 
566 
568  """!
569  @brief Returns OPTICS objects where each object contains information about index of point from processed data,
570  core distance and reachability distance.
571 
572  @return (list) OPTICS objects.
573 
574  @see get_ordering()
575  @see get_clusters()
576  @see get_noise()
577  @see optics_descriptor
578 
579  """
580 
581  return self.__optics_objects
582 
583 
584  def get_radius(self):
585  """!
586  @brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
587  @details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
588 
589  @return (double) Connectivity radius.
590 
591  @see get_ordering()
592  @see get_clusters()
593  @see get_noise()
594  @see get_optics_objects()
595 
596  """
597 
598  return self.__eps
599 
600 
602  """!
603  @brief Returns clustering result representation type that indicate how clusters are encoded.
604 
605  @return (type_encoding) Clustering result representation.
606 
607  @see get_clusters()
608 
609  """
610 
611  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
612 
613 
614  def __create_neighbor_searcher(self, data_type):
615  """!
616  @brief Returns neighbor searcher in line with data type.
617 
618  @param[in] data_type (string): Data type (points or distance matrix).
619 
620  """
621  if data_type == 'points':
622  return self.__neighbor_indexes_points
623  elif data_type == 'distance_matrix':
625  else:
626  raise TypeError("Unknown type of data is specified '%s'" % data_type)
627 
628 
629  def __expand_cluster_order(self, optics_object):
630  """!
631  @brief Expand cluster order from not processed optic-object that corresponds to object from input data.
632  Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
633  Order database is updated during expanding.
634 
635  @param[in] optics_object (optics_descriptor): Object that hasn't been processed.
636 
637  """
638 
639  optics_object.processed = True
640 
641  neighbors_descriptor = self.__neighbor_searcher(optics_object)
642  optics_object.reachability_distance = None
643 
644  self.__ordered_database.append(optics_object)
645 
646  # Check core distance
647  if len(neighbors_descriptor) >= self.__minpts:
648  neighbors_descriptor.sort(key = lambda obj: obj[1])
649  optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1]
650 
651  # Continue processing
652  order_seed = list()
653  self.__update_order_seed(optics_object, neighbors_descriptor, order_seed)
654 
655  while len(order_seed) > 0:
656  optic_descriptor = order_seed[0]
657  order_seed.remove(optic_descriptor)
658 
659  neighbors_descriptor = self.__neighbor_searcher(optic_descriptor)
660  optic_descriptor.processed = True
661 
662  self.__ordered_database.append(optic_descriptor)
663 
664  if len(neighbors_descriptor) >= self.__minpts:
665  neighbors_descriptor.sort(key = lambda obj: obj[1])
666  optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1]
667 
668  self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed)
669  else:
670  optic_descriptor.core_distance = None
671 
672  else:
673  optics_object.core_distance = None
674 
675 
676  def __extract_clusters(self):
677  """!
678  @brief Extract clusters and noise from order database.
679 
680  """
681 
682  self.__clusters = []
683  self.__noise = []
684 
685  current_cluster = self.__noise
686  for optics_object in self.__ordered_database:
687  if (optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps):
688  if (optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps):
689  self.__clusters.append([ optics_object.index_object ])
690  current_cluster = self.__clusters[-1]
691  else:
692  self.__noise.append(optics_object.index_object)
693  else:
694  current_cluster.append(optics_object.index_object)
695 
696 
697  def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
698  """!
699  @brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
700 
701  @param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
702  @param[in] neighbors_descriptors (list): List of neighbors of core-object.
703  @param[in|out] order_seed (list): List of sorted object in line with reachable distance.
704 
705  """
706 
707  for neighbor_descriptor in neighbors_descriptors:
708  index_neighbor = neighbor_descriptor[0]
709  current_reachable_distance = neighbor_descriptor[1]
710 
711  if self.__optics_objects[index_neighbor].processed is not True:
712  reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance)
713  if self.__optics_objects[index_neighbor].reachability_distance is None:
714  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
715 
716  # insert element in queue O(n) - worst case.
717  index_insertion = len(order_seed)
718  for index_seed in range(0, len(order_seed)):
719  if reachable_distance < order_seed[index_seed].reachability_distance:
720  index_insertion = index_seed
721  break
722 
723  order_seed.insert(index_insertion, self.__optics_objects[index_neighbor])
724 
725  else:
726  if reachable_distance < self.__optics_objects[index_neighbor].reachability_distance:
727  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
728  order_seed.sort(key=lambda obj: obj.reachability_distance)
729 
730 
731  def __neighbor_indexes_points(self, optic_object):
732  """!
733  @brief Return neighbors of the specified object in case of sequence of points.
734 
735  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
736 
737  @return (list) List of indexes of neighbors in line the connectivity radius.
738 
739  """
740  kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps)
741  return [[node_tuple[1].payload, math.sqrt(node_tuple[0])] for node_tuple in kdnodes if
742  node_tuple[1].payload != optic_object.index_object]
743 
744 
745  def __neighbor_indexes_distance_matrix(self, optic_object):
746  """!
747  @brief Return neighbors of the specified object in case of distance matrix.
748 
749  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
750 
751  @return (list) List of indexes of neighbors in line the connectivity radius.
752 
753  """
754  distances = self.__sample_pointer[optic_object.index_object]
755  return [[index_neighbor, distances[index_neighbor]] for index_neighbor in range(len(distances))
756  if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]
757 
758 
759  def __verify_arguments(self):
760  """!
761  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
762 
763  """
764  if len(self.__sample_pointer) == 0:
765  raise ValueError("Input data is empty (size: '%d')." % len(self.__sample_pointer))
766 
767  if self.__eps < 0:
768  raise ValueError("Connectivity radius (current value: '%d') should be greater or equal to 0." % self.__eps)
769 
770  if self.__minpts < 0:
771  raise ValueError("Minimum number of neighbors (current value: '%d') should be greater than 0." %
772  self.__minpts)
773 
774  if (self.__amount_clusters is not None) and (self.__amount_clusters <= 0):
775  raise ValueError("Amount of clusters (current value: '%d') should be greater than 0." %
776  self.__amount_clusters)
pyclustering.cluster.optics.optics.__amount_clusters
__amount_clusters
Definition: optics.py:395
pyclustering.cluster.optics.optics.__clusters
__clusters
Definition: optics.py:398
pyclustering.container.kdtree
Data Structure: KD-Tree.
Definition: kdtree.py:1
pyclustering.cluster.optics.optics.__init__
def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, **kwargs)
Constructor of clustering algorithm OPTICS.
Definition: optics.py:374
pyclustering.cluster.optics.optics.get_noise
def get_noise(self)
Returns list of noise that contains indexes of objects that corresponds to input data.
Definition: optics.py:524
pyclustering.cluster.optics.ordering_analyser.calculate_connvectivity_radius
def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations=100)
Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and ...
Definition: optics.py:122
pyclustering.cluster.optics.ordering_analyser.__ordering
__ordering
Definition: optics.py:111
pyclustering.cluster.optics.ordering_visualizer.show_ordering_diagram
def show_ordering_diagram(analyser, amount_clusters=None)
Display cluster-ordering (reachability-plot) diagram.
Definition: optics.py:37
pyclustering.cluster.optics.optics_descriptor.__repr__
def __repr__(self)
Returns string representation of the optics descriptor.
Definition: optics.py:254
pyclustering.container.kdtree.kdtree_balanced
Represents balanced static KD-tree that does not provide services to add and remove nodes after initi...
Definition: kdtree.py:243
pyclustering.cluster.optics.optics.__kdtree
__kdtree
Definition: optics.py:404
pyclustering.cluster.optics.optics
Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with ...
Definition: optics.py:263
pyclustering.cluster.optics.optics.__update_order_seed
def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed)
Update sorted list of reachable objects (from core-object) that should be processed using neighbors o...
Definition: optics.py:697
pyclustering.cluster.optics.optics.__expand_cluster_order
def __expand_cluster_order(self, optics_object)
Expand cluster order from not processed optic-object that corresponds to object from input data.
Definition: optics.py:629
pyclustering.cluster.optics.optics.__ordered_database
__ordered_database
Definition: optics.py:487
pyclustering.cluster.optics.optics_descriptor.__init__
def __init__(self, index, core_distance=None, reachability_distance=None)
Constructor of object description in optics terms.
Definition: optics.py:232
pyclustering.cluster.optics.optics.__ccore
__ccore
Definition: optics.py:405
pyclustering.cluster.optics.optics.__neighbor_searcher
__neighbor_searcher
Definition: optics.py:407
pyclustering.cluster.optics.optics.get_clusters
def get_clusters(self)
Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster i...
Definition: optics.py:508
pyclustering.cluster.optics.optics.__create_neighbor_searcher
def __create_neighbor_searcher(self, data_type)
Returns neighbor searcher in line with data type.
Definition: optics.py:614
pyclustering.cluster.optics.optics.get_cluster_encoding
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: optics.py:601
pyclustering.cluster.optics.ordering_analyser.extract_cluster_amount
def extract_cluster_amount(self, radius)
Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and...
Definition: optics.py:164
pyclustering.cluster.optics.optics.__extract_clusters
def __extract_clusters(self)
Extract clusters and noise from order database.
Definition: optics.py:676
pyclustering.cluster.optics.optics.__neighbor_indexes_distance_matrix
def __neighbor_indexes_distance_matrix(self, optic_object)
Return neighbors of the specified object in case of distance matrix.
Definition: optics.py:745
pyclustering.cluster.optics.optics.__allocate_clusters
def __allocate_clusters(self)
Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
Definition: optics.py:493
pyclustering.cluster.optics.optics.__sample_pointer
__sample_pointer
Definition: optics.py:392
pyclustering.cluster.optics.optics.__neighbor_indexes_points
def __neighbor_indexes_points(self, optic_object)
Return neighbors of the specified object in case of sequence of points.
Definition: optics.py:731
pyclustering.cluster.optics.optics.process
def process(self)
Performs cluster analysis in line with rules of OPTICS algorithm.
Definition: optics.py:415
pyclustering.cluster.optics.optics.get_radius
def get_radius(self)
Returns connectivity radius that is calculated and used for clustering by the algorithm.
Definition: optics.py:584
pyclustering.cluster.optics.optics.__minpts
__minpts
Definition: optics.py:394
pyclustering.cluster.optics.optics.__initialize
def __initialize(self, sample)
Initializes internal states and resets clustering results in line with input sample.
Definition: optics.py:479
pyclustering.cluster.optics.optics.get_ordering
def get_ordering(self)
Returns clustering ordering information about the input data set.
Definition: optics.py:540
pyclustering.cluster.optics.optics_descriptor.core_distance
core_distance
Core distance - the smallest distance to reach specified number of neighbors that is not greater then...
Definition: optics.py:246
pyclustering.cluster.optics.ordering_analyser.__init__
def __init__(self, ordering_diagram)
Analyser of ordering diagram that is based on reachability-distances.
Definition: optics.py:104
pyclustering.cluster.optics.optics.__process_by_python
def __process_by_python(self)
Performs cluster analysis using python code.
Definition: optics.py:460
pyclustering.cluster.optics.optics.__eps
__eps
Definition: optics.py:393
pyclustering.cluster.optics.optics.__optics_objects
__optics_objects
Definition: optics.py:400
pyclustering.cluster.optics.optics.__verify_arguments
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: optics.py:759
pyclustering.cluster.optics.ordering_visualizer
Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering s...
Definition: optics.py:27
pyclustering.cluster.optics.optics.get_optics_objects
def get_optics_objects(self)
Returns OPTICS objects where each object contains information about index of point from processed dat...
Definition: optics.py:567
pyclustering.cluster.optics.optics.__noise
__noise
Definition: optics.py:399
pyclustering.cluster.optics.ordering_analyser.cluster_ordering
def cluster_ordering(self)
(list) Returns values of dataset cluster ordering.
Definition: optics.py:96
pyclustering.cluster.optics.optics_descriptor.processed
processed
True is object has been already traversed.
Definition: optics.py:252
pyclustering.cluster.optics.ordering_analyser
Analyser of cluster ordering diagram.
Definition: optics.py:84
pyclustering.cluster.optics.optics.__data_type
__data_type
Definition: optics.py:402
pyclustering.utils.color
Colors used by pyclustering library for visualization.
Definition: color.py:1
pyclustering.cluster.optics.optics.__process_by_ccore
def __process_by_ccore(self)
Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
Definition: optics.py:436
pyclustering.cluster.optics.optics_descriptor.index_object
index_object
Index of object from the input data.
Definition: optics.py:243
pyclustering.cluster.encoder
Module for representing clustering results.
Definition: encoder.py:1
pyclustering.cluster.optics.optics_descriptor.reachability_distance
reachability_distance
Reachability distance - the smallest distance to be reachable by core object.
Definition: optics.py:249
pyclustering.cluster.optics.ordering_analyser.__len__
def __len__(self)
Returns length of clustering-ordering diagram.
Definition: optics.py:114
pyclustering.cluster.optics.optics.__ordering
__ordering
Definition: optics.py:397
pyclustering.cluster.optics.optics_descriptor
Object description that used by OPTICS algorithm for cluster analysis.
Definition: optics.py:226
pyclustering.cluster.optics.optics.__processed
__processed
Definition: optics.py:485