optics.py
1 """!
2 
3 @brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
4 @details Implementation based on paper @cite article::optics::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2019
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import math
29 import warnings
30 
31 try:
32  import matplotlib.pyplot as plt
33 except Exception as error_instance:
34  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
35  "functionality is not available (details: '%s')." % str(error_instance))
36 
37 from pyclustering.container.kdtree import kdtree
38 
39 from pyclustering.cluster.encoder import type_encoding
40 
41 from pyclustering.utils.color import color as color_list
42 
43 from pyclustering.core.wrapper import ccore_library
44 
45 import pyclustering.core.optics_wrapper as wrapper
46 
47 
49  """!
50  @brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
51  @details This OPTICS algorithm is KD-tree optimized.
52 
53  @see ordering_analyser
54 
55  """
56 
57  @staticmethod
58  def show_ordering_diagram(analyser, amount_clusters = None):
59  """!
60  @brief Display cluster-ordering (reachability-plot) diagram.
61 
62  @param[in] analyser (ordering_analyser): cluster-ordering analyser whose ordering diagram should be displayed.
63  @param[in] amount_clusters (uint): if it is not 'None' then it displays connectivity radius line that can used for allocation of specified amount of clusters
64  and colorize diagram by corresponding cluster colors.
65 
66  Example demonstrates general abilities of 'ordering_visualizer' class:
67  @code
68  # Display cluster-ordering diagram with connectivity radius is used for allocation of three clusters.
69  ordering_visualizer.show_ordering_diagram(analyser, 3);
70 
71  # Display cluster-ordering diagram without radius.
72  ordering_visualizer.show_ordering_diagram(analyser);
73  @endcode
74 
75  """
76  ordering = analyser.cluster_ordering
77  axis = plt.subplot(111)
78 
79  if amount_clusters is not None:
80  radius, borders = analyser.calculate_connvectivity_radius(amount_clusters)
81 
82  # divide into cluster groups to visualize by colors
83  left_index_border = 0
84  current_index_border = 0
85  for index_border in range(len(borders)):
86  right_index_border = borders[index_border]
87  axis.bar(range(left_index_border, right_index_border), ordering[left_index_border:right_index_border], width = 1.0, color = color_list.TITLES[index_border])
88  left_index_border = right_index_border
89  current_index_border = index_border
90 
91  axis.bar(range(left_index_border, len(ordering)), ordering[left_index_border:len(ordering)], width = 1.0, color = color_list.TITLES[current_index_border + 1])
92 
93  plt.xlim([0, len(ordering)])
94 
95  plt.axhline(y = radius, linewidth = 2, color = 'black')
96  plt.text(0, radius + radius * 0.03, " Radius: " + str(round(radius, 4)) + ";\n Clusters: " + str(amount_clusters), color = 'b', fontsize = 10)
97 
98  else:
99  axis.bar(range(0, len(ordering)), ordering[0:len(ordering)], width = 1.0, color = 'black')
100  plt.xlim([0, len(ordering)])
101 
102  plt.show()
103 
104 
106  """!
107  @brief Analyser of cluster ordering diagram.
108  @details Using cluster-ordering it is able to connectivity radius for allocation of specified amount of clusters and
109  calculate amount of clusters using specified connectivity radius. Cluster-ordering is formed by OPTICS algorithm
110  during cluster analysis.
111 
112  @see optics
113 
114  """
115 
116  @property
117  def cluster_ordering(self):
118  """!
119  @brief (list) Returns values of dataset cluster ordering.
120 
121  """
122  return self.__ordering
123 
124 
125  def __init__(self, ordering_diagram):
126  """!
127  @brief Analyser of ordering diagram that is based on reachability-distances.
128 
129  @see calculate_connvectivity_radius
130 
131  """
132  self.__ordering = ordering_diagram
133 
134 
135  def __len__(self):
136  """!
137  @brief Returns length of clustering-ordering diagram.
138 
139  """
140  return len(self.__ordering)
141 
142 
143  def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations = 100):
144  """!
145  @brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and marks borders of clusters using indexes of values of ordering diagram.
146  @details Parameter 'maximum_iterations' is used to protect from hanging when it is impossible to allocate specified number of clusters.
147 
148  @param[in] amount_clusters (uint): amount of clusters that should be allocated by calculated connectivity radius.
149  @param[in] maximum_iterations (uint): maximum number of iteration for searching connectivity radius to allocated specified amount of clusters (by default it is restricted by 100 iterations).
150 
151  @return (double, list) Value of connectivity radius and borders of clusters like (radius, borders), radius may be 'None' as well as borders may be '[]'
152  if connectivity radius hasn't been found for the specified amount of iterations.
153 
154  """
155 
156  maximum_distance = max(self.__ordering)
157 
158  upper_distance = maximum_distance
159  lower_distance = 0.0
160 
161  result = None
162 
163  amount, borders = self.extract_cluster_amount(maximum_distance)
164  if amount <= amount_clusters:
165  for _ in range(maximum_iterations):
166  radius = (lower_distance + upper_distance) / 2.0
167 
168  amount, borders = self.extract_cluster_amount(radius)
169  if amount == amount_clusters:
170  result = radius
171  break
172 
173  elif amount == 0:
174  break
175 
176  elif amount > amount_clusters:
177  lower_distance = radius
178 
179  elif amount < amount_clusters:
180  upper_distance = radius
181 
182  return result, borders
183 
184 
185  def extract_cluster_amount(self, radius):
186  """!
187  @brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and borders between them.
188  @details When growth of reachability-distances is detected than it is considered as a start point of cluster,
189  than pick is detected and after that recession is observed until new growth (that means end of the
190  current cluster and start of a new one) or end of diagram.
191 
192  @param[in] radius (double): connectivity radius that is used for cluster allocation.
193 
194  @return (unit, list) Amount of clusters that can be allocated by the connectivity radius on ordering diagram and borders between them using indexes
195  from ordering diagram (amount_clusters, border_clusters).
196 
197  """
198 
199  amount_clusters = 1
200 
201  cluster_start = False
202  cluster_pick = False
203  total_similarity = True
204  previous_cluster_distance = None
205  previous_distance = None
206 
207  cluster_borders = []
208 
209  for index_ordering in range(len(self.__ordering)):
210  distance = self.__ordering[index_ordering]
211  if distance >= radius:
212  if cluster_start is False:
213  cluster_start = True
214  amount_clusters += 1
215 
216  if index_ordering != 0:
217  cluster_borders.append(index_ordering)
218 
219  else:
220  if (distance < previous_cluster_distance) and (cluster_pick is False):
221  cluster_pick = True
222 
223  elif (distance > previous_cluster_distance) and (cluster_pick is True):
224  cluster_pick = False
225  amount_clusters += 1
226 
227  if index_ordering != 0:
228  cluster_borders.append(index_ordering)
229 
230  previous_cluster_distance = distance
231 
232  else:
233  cluster_start = False
234  cluster_pick = False
235 
236  if (previous_distance is not None) and (distance != previous_distance):
237  total_similarity = False
238 
239  previous_distance = distance
240 
241  if (total_similarity is True) and (previous_distance > radius):
242  amount_clusters = 0
243 
244  return amount_clusters, cluster_borders
245 
246 
248  """!
249  @brief Object description that used by OPTICS algorithm for cluster analysis.
250 
251  """
252 
253  def __init__(self, index, core_distance = None, reachability_distance = None):
254  """!
255  @brief Constructor of object description in optics terms.
256 
257  @param[in] index (uint): Index of the object in the data set.
258  @param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
259  @param[in] reachability_distance (double): Reachability distance to this object.
260 
261  """
262 
263 
264  self.index_object = index
265 
266 
267  self.core_distance = core_distance
268 
269 
270  self.reachability_distance = reachability_distance
271 
272 
273  self.processed = False
274 
275  def __repr__(self):
276  """!
277  @brief Returns string representation of the optics descriptor.
278 
279  """
280 
281  return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance)
282 
283 
284 class optics:
285  """!
286  @brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with KD-tree optimization (ccore options is supported).
287  @details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
288  Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
289  for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
290  bigger than real - because it will be calculated by the algorithms if requested amount of clusters is not allocated.
291 
292  @image html optics_example_clustering.png "Scheme how does OPTICS works. At the beginning only one cluster is allocated, but two is requested. At the second step OPTICS calculates connectivity radius using cluster-ordering and performs final cluster allocation."
293 
294  Clustering example using sample 'Chainlink':
295  @code
296  from pyclustering.cluster import cluster_visualizer
297  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
298  from pyclustering.samples.definitions import FCPS_SAMPLES
299  from pyclustering.utils import read_sample
300 
301  # Read sample for clustering from some file.
302  sample = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
303 
304  # Run cluster analysis where connectivity radius is bigger than real.
305  radius = 0.5
306  neighbors = 3
307  optics_instance = optics(sample, radius, neighbors)
308 
309  # Performs cluster analysis.
310  optics_instance.process()
311 
312  # Obtain results of clustering.
313  clusters = optics_instance.get_clusters()
314  noise = optics_instance.get_noise()
315  ordering = optics_instance.get_ordering()
316 
317  # Visualize clustering results.
318  visualizer = cluster_visualizer()
319  visualizer.append_clusters(clusters, sample)
320  visualizer.show()
321 
322  # Display ordering.
323  analyser = ordering_analyser(ordering)
324  ordering_visualizer.show_ordering_diagram(analyser, 2)
325  @endcode
326 
327  Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
328  @code
329  from pyclustering.cluster import cluster_visualizer
330  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
331  from pyclustering.samples.definitions import FCPS_SAMPLES
332  from pyclustering.utils import read_sample
333 
334  # Read sample for clustering from some file
335  sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
336 
337  # Run cluster analysis where connectivity radius is bigger than real
338  radius = 2.0
339  neighbors = 3
340  amount_of_clusters = 3
341  optics_instance = optics(sample, radius, neighbors, amount_of_clusters)
342 
343  # Performs cluster analysis
344  optics_instance.process()
345 
346  # Obtain results of clustering
347  clusters = optics_instance.get_clusters()
348  noise = optics_instance.get_noise()
349  ordering = optics_instance.get_ordering()
350 
351  # Visualize ordering diagram
352  analyser = ordering_analyser(ordering)
353  ordering_visualizer.show_ordering_diagram(analyser, amount_of_clusters)
354 
355  # Visualize clustering results
356  visualizer = cluster_visualizer()
357  visualizer.append_clusters(clusters, sample)
358  visualizer.show()
359  @endcode
360 
361  Here is an example where OPTICS extracts outliers from sample 'Tetra':
362  @code
363  from pyclustering.cluster import cluster_visualizer
364  from pyclustering.cluster.optics import optics
365  from pyclustering.samples.definitions import FCPS_SAMPLES
366  from pyclustering.utils import read_sample
367 
368  # Read sample for clustering from some file.
369  sample = read_sample(FCPS_SAMPLES.SAMPLE_TETRA)
370 
371  # Run cluster analysis where connectivity radius is bigger than real.
372  radius = 0.4
373  neighbors = 3
374  optics_instance = optics(sample, radius, neighbors)
375 
376  # Performs cluster analysis.
377  optics_instance.process()
378 
379  # Obtain results of clustering.
380  clusters = optics_instance.get_clusters()
381  noise = optics_instance.get_noise()
382 
383  # Visualize clustering results (clusters and outliers).
384  visualizer = cluster_visualizer()
385  visualizer.append_clusters(clusters, sample)
386  visualizer.append_cluster(noise, sample, marker='x')
387  visualizer.show()
388  @endcode
389 
390  Visualization result of allocated clusters and outliers is presented on the image below:
391  @image html optics_noise_tetra.png "Clusters and outliers extracted by OPTICS algorithm from sample 'Tetra'."
392 
393  """
394 
395  def __init__(self, sample, eps, minpts, amount_clusters = None, ccore = True, **kwargs):
396  """!
397  @brief Constructor of clustering algorithm OPTICS.
398 
399  @param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
400  @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
401  @param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
402  @param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
403  In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
404  in connectivity radius usage.
405  @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
406  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').
407 
408  <b>Keyword Args:</b><br>
409  - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
410 
411  """
412 
413  self.__sample_pointer = sample # Algorithm parameter - pointer to sample for processing.
414  self.__eps = eps # Algorithm parameter - connectivity radius between object for establish links between object.
415  self.__minpts = minpts # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
416  self.__amount_clusters = amount_clusters
417 
418  self.__ordering = None
419  self.__clusters = None
420  self.__noise = None
421  self.__optics_objects = None
422 
423  self.__data_type = kwargs.get('data_type', 'points')
424 
425  self.__kdtree = None
426  self.__ccore = ccore
427 
429 
430  if self.__ccore:
431  self.__ccore = ccore_library.workable()
432 
433 
434  def process(self):
435  """!
436  @brief Performs cluster analysis in line with rules of OPTICS algorithm.
437 
438  @remark Results of clustering can be obtained using corresponding gets methods.
439 
440  @see get_clusters()
441  @see get_noise()
442  @see get_ordering()
443 
444  """
445 
446  if self.__ccore is True:
447  self.__process_by_ccore()
448 
449  else:
450  self.__process_by_python()
451 
452 
453  def __process_by_ccore(self):
454  """!
455  @brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
456 
457  """
458 
459  (self.__clusters, self.__noise, self.__ordering, self.__eps,
460  objects_indexes, objects_core_distances, objects_reachability_distances) = \
461  wrapper.optics(self.__sample_pointer, self.__eps, self.__minpts, self.__amount_clusters, self.__data_type)
462 
463  self.__optics_objects = []
464  for i in range(len(objects_indexes)):
465  if objects_core_distances[i] < 0.0:
466  objects_core_distances[i] = None
467 
468  if objects_reachability_distances[i] < 0.0:
469  objects_reachability_distances[i] = None
470 
471  optics_object = optics_descriptor(objects_indexes[i], objects_core_distances[i], objects_reachability_distances[i])
472  optics_object.processed = True
473 
474  self.__optics_objects.append(optics_object)
475 
476 
477  def __process_by_python(self):
478  """!
479  @brief Performs cluster analysis using python code.
480 
481  """
482 
483  if self.__data_type == 'points':
484  self.__kdtree = kdtree(self.__sample_pointer, range(len(self.__sample_pointer)))
485 
486  self.__allocate_clusters()
487 
488  if (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())):
489  analyser = ordering_analyser(self.get_ordering())
490  radius, _ = analyser.calculate_connvectivity_radius(self.__amount_clusters)
491  if radius is not None:
492  self.__eps = radius
493  self.__allocate_clusters()
494 
495 
496  def __initialize(self, sample):
497  """!
498  @brief Initializes internal states and resets clustering results in line with input sample.
499 
500  """
501 
502  self.__processed = [False] * len(sample)
503  self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))] # List of OPTICS objects that corresponds to objects from input sample.
504  self.__ordered_database = [] # List of OPTICS objects in traverse order.
505 
506  self.__clusters = None # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
507  self.__noise = None # Result of clustering (noise).
508 
509 
510  def __allocate_clusters(self):
511  """!
512  @brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
513 
514  """
515 
516  self.__initialize(self.__sample_pointer)
517 
518  for optic_object in self.__optics_objects:
519  if optic_object.processed is False:
520  self.__expand_cluster_order(optic_object)
521 
522  self.__extract_clusters()
523 
524 
525  def get_clusters(self):
526  """!
527  @brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
528 
529  @return (list) List of allocated clusters.
530 
531  @see process()
532  @see get_noise()
533  @see get_ordering()
534  @see get_radius()
535 
536  """
537 
538  return self.__clusters
539 
540 
541  def get_noise(self):
542  """!
543  @brief Returns list of noise that contains indexes of objects that corresponds to input data.
544 
545  @return (list) List of allocated noise objects.
546 
547  @see process()
548  @see get_clusters()
549  @see get_ordering()
550  @see get_radius()
551 
552  """
553 
554  return self.__noise
555 
556 
557  def get_ordering(self):
558  """!
559  @brief Returns clustering ordering information about the input data set.
560  @details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
561 
562  @return (ordering_analyser) Analyser of clustering ordering.
563 
564  @see process()
565  @see get_clusters()
566  @see get_noise()
567  @see get_radius()
568  @see get_optics_objects()
569 
570  """
571 
572  if self.__ordering is None:
573  self.__ordering = []
574 
575  for cluster in self.__clusters:
576  for index_object in cluster:
577  optics_object = self.__optics_objects[index_object]
578  if optics_object.reachability_distance is not None:
579  self.__ordering.append(optics_object.reachability_distance)
580 
581  return self.__ordering
582 
583 
585  """!
586  @brief Returns OPTICS objects where each object contains information about index of point from processed data,
587  core distance and reachability distance.
588 
589  @return (list) OPTICS objects.
590 
591  @see get_ordering()
592  @see get_clusters()
593  @see get_noise()
594  @see optics_descriptor
595 
596  """
597 
598  return self.__optics_objects
599 
600 
601  def get_radius(self):
602  """!
603  @brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
604  @details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
605 
606  @return (double) Connectivity radius.
607 
608  @see get_ordering()
609  @see get_clusters()
610  @see get_noise()
611  @see get_optics_objects()
612 
613  """
614 
615  return self.__eps
616 
617 
619  """!
620  @brief Returns clustering result representation type that indicate how clusters are encoded.
621 
622  @return (type_encoding) Clustering result representation.
623 
624  @see get_clusters()
625 
626  """
627 
628  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
629 
630 
631  def __create_neighbor_searcher(self, data_type):
632  """!
633  @brief Returns neighbor searcher in line with data type.
634 
635  @param[in] data_type (string): Data type (points or distance matrix).
636 
637  """
638  if data_type == 'points':
639  return self.__neighbor_indexes_points
640  elif data_type == 'distance_matrix':
642  else:
643  raise TypeError("Unknown type of data is specified '%s'" % data_type)
644 
645 
646  def __expand_cluster_order(self, optics_object):
647  """!
648  @brief Expand cluster order from not processed optic-object that corresponds to object from input data.
649  Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
650  Order database is updated during expanding.
651 
652  @param[in] optics_object (optics_descriptor): Object that hasn't been processed.
653 
654  """
655 
656  optics_object.processed = True
657 
658  neighbors_descriptor = self.__neighbor_searcher(optics_object)
659  optics_object.reachability_distance = None
660 
661  self.__ordered_database.append(optics_object)
662 
663  # Check core distance
664  if len(neighbors_descriptor) >= self.__minpts:
665  neighbors_descriptor.sort(key = lambda obj: obj[1])
666  optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1]
667 
668  # Continue processing
669  order_seed = list()
670  self.__update_order_seed(optics_object, neighbors_descriptor, order_seed)
671 
672  while len(order_seed) > 0:
673  optic_descriptor = order_seed[0]
674  order_seed.remove(optic_descriptor)
675 
676  neighbors_descriptor = self.__neighbor_searcher(optic_descriptor)
677  optic_descriptor.processed = True
678 
679  self.__ordered_database.append(optic_descriptor)
680 
681  if len(neighbors_descriptor) >= self.__minpts:
682  neighbors_descriptor.sort(key = lambda obj: obj[1])
683  optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1]
684 
685  self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed)
686  else:
687  optic_descriptor.core_distance = None
688 
689  else:
690  optics_object.core_distance = None
691 
692 
693  def __extract_clusters(self):
694  """!
695  @brief Extract clusters and noise from order database.
696 
697  """
698 
699  self.__clusters = []
700  self.__noise = []
701 
702  current_cluster = self.__noise
703  for optics_object in self.__ordered_database:
704  if (optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps):
705  if (optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps):
706  self.__clusters.append([ optics_object.index_object ])
707  current_cluster = self.__clusters[-1]
708  else:
709  self.__noise.append(optics_object.index_object)
710  else:
711  current_cluster.append(optics_object.index_object)
712 
713 
714  def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
715  """!
716  @brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
717 
718  @param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
719  @param[in] neighbors_descriptors (list): List of neighbors of core-object.
720  @param[in|out] order_seed (list): List of sorted object in line with reachable distance.
721 
722  """
723 
724  for neighbor_descriptor in neighbors_descriptors:
725  index_neighbor = neighbor_descriptor[0]
726  current_reachable_distance = neighbor_descriptor[1]
727 
728  if self.__optics_objects[index_neighbor].processed is not True:
729  reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance)
730  if self.__optics_objects[index_neighbor].reachability_distance is None:
731  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
732 
733  # insert element in queue O(n) - worst case.
734  index_insertion = len(order_seed)
735  for index_seed in range(0, len(order_seed)):
736  if reachable_distance < order_seed[index_seed].reachability_distance:
737  index_insertion = index_seed
738  break
739 
740  order_seed.insert(index_insertion, self.__optics_objects[index_neighbor])
741 
742  else:
743  if reachable_distance < self.__optics_objects[index_neighbor].reachability_distance:
744  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
745  order_seed.sort(key = lambda obj: obj.reachability_distance)
746 
747 
748  def __neighbor_indexes_points(self, optic_object):
749  """!
750  @brief Return neighbors of the specified object in case of sequence of points.
751 
752  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
753 
754  @return (list) List of indexes of neighbors in line the connectivity radius.
755 
756  """
757  kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps)
758  return [[node_tuple[1].payload, math.sqrt(node_tuple[0])] for node_tuple in kdnodes if
759  node_tuple[1].payload != optic_object.index_object]
760 
761 
762  def __neighbor_indexes_distance_matrix(self, optic_object):
763  """!
764  @brief Return neighbors of the specified object in case of distance matrix.
765 
766  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
767 
768  @return (list) List of indexes of neighbors in line the connectivity radius.
769 
770  """
771  distances = self.__sample_pointer[optic_object.index_object]
772  return [[index_neighbor, distances[index_neighbor]] for index_neighbor in range(len(distances))
773  if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]
Object description that used by OPTICS algorithm for cluster analysis.
Definition: optics.py:247
def process(self)
Performs cluster analysis in line with rules of OPTICS algorithm.
Definition: optics.py:434
def __neighbor_indexes_points(self, optic_object)
Return neighbors of the specified object in case of sequence of points.
Definition: optics.py:748
def __len__(self)
Returns length of clustering-ordering diagram.
Definition: optics.py:135
def show_ordering_diagram(analyser, amount_clusters=None)
Display cluster-ordering (reachability-plot) diagram.
Definition: optics.py:58
def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed)
Update sorted list of reachable objects (from core-object) that should be processed using neighbors o...
Definition: optics.py:714
def get_ordering(self)
Returns clustering ordering information about the input data set.
Definition: optics.py:557
Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with ...
Definition: optics.py:284
def get_optics_objects(self)
Returns OPTICS objects where each object contains information about index of point from processed dat...
Definition: optics.py:584
def __repr__(self)
Returns string representation of the optics descriptor.
Definition: optics.py:275
def cluster_ordering(self)
(list) Returns values of dataset cluster ordering.
Definition: optics.py:117
reachability_distance
Reachability distance - the smallest distance to be reachable by core object.
Definition: optics.py:270
Module for representing clustering results.
Definition: encoder.py:1
Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimension...
Definition: kdtree.py:157
def __create_neighbor_searcher(self, data_type)
Returns neighbor searcher in line with data type.
Definition: optics.py:631
Colors used by pyclustering library for visualization.
Definition: color.py:1
index_object
Index of object from the input data.
Definition: optics.py:264
processed
True is object has been already traversed.
Definition: optics.py:273
core_distance
Core distance - the smallest distance to reach specified number of neighbors that is not greater then...
Definition: optics.py:267
def __init__(self, index, core_distance=None, reachability_distance=None)
Constructor of object description in optics terms.
Definition: optics.py:253
def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, kwargs)
Constructor of clustering algorithm OPTICS.
Definition: optics.py:395
def get_clusters(self)
Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster i...
Definition: optics.py:525
def __extract_clusters(self)
Extract clusters and noise from order database.
Definition: optics.py:693
Analyser of cluster ordering diagram.
Definition: optics.py:105
def __initialize(self, sample)
Initializes internal states and resets clustering results in line with input sample.
Definition: optics.py:496
def __allocate_clusters(self)
Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
Definition: optics.py:510
def extract_cluster_amount(self, radius)
Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and...
Definition: optics.py:185
def get_noise(self)
Returns list of noise that contains indexes of objects that corresponds to input data.
Definition: optics.py:541
def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations=100)
Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and ...
Definition: optics.py:143
def __process_by_ccore(self)
Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
Definition: optics.py:453
def __neighbor_indexes_distance_matrix(self, optic_object)
Return neighbors of the specified object in case of distance matrix.
Definition: optics.py:762
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: optics.py:618
def __process_by_python(self)
Performs cluster analysis using python code.
Definition: optics.py:477
Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering s...
Definition: optics.py:48
def __init__(self, ordering_diagram)
Analyser of ordering diagram that is based on reachability-distances.
Definition: optics.py:125
def get_radius(self)
Returns connectivity radius that is calculated and used for clustering by the algorithm.
Definition: optics.py:601
Data Structure: KD-Tree.
Definition: kdtree.py:1
def __expand_cluster_order(self, optics_object)
Expand cluster order from not processed optic-object that corresponds to object from input data...
Definition: optics.py:646