optics.py
1 """!
2 
3 @brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
4 @details Implementation based on paper @cite article::optics::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2020
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import math
29 import warnings
30 
31 try:
32  import matplotlib.pyplot as plt
33 except Exception as error_instance:
34  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
35  "functionality is not available (details: '%s')." % str(error_instance))
36 
37 from pyclustering.container.kdtree import kdtree_balanced
38 
39 from pyclustering.cluster.encoder import type_encoding
40 
41 from pyclustering.utils.color import color as color_list
42 
43 from pyclustering.core.wrapper import ccore_library
44 
45 import pyclustering.core.optics_wrapper as wrapper
46 
47 
49  """!
50  @brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
51  @details This OPTICS algorithm is KD-tree optimized.
52 
53  @see ordering_analyser
54 
55  """
56 
57  @staticmethod
58  def show_ordering_diagram(analyser, amount_clusters = None):
59  """!
60  @brief Display cluster-ordering (reachability-plot) diagram.
61 
62  @param[in] analyser (ordering_analyser): cluster-ordering analyser whose ordering diagram should be displayed.
63  @param[in] amount_clusters (uint): if it is not 'None' then it displays connectivity radius line that can used for allocation of specified amount of clusters
64  and colorize diagram by corresponding cluster colors.
65 
66  Example demonstrates general abilities of 'ordering_visualizer' class:
67  @code
68  # Display cluster-ordering diagram with connectivity radius is used for allocation of three clusters.
69  ordering_visualizer.show_ordering_diagram(analyser, 3);
70 
71  # Display cluster-ordering diagram without radius.
72  ordering_visualizer.show_ordering_diagram(analyser);
73  @endcode
74 
75  """
76  ordering = analyser.cluster_ordering
77  axis = plt.subplot(111)
78 
79  if amount_clusters is not None:
80  radius, borders = analyser.calculate_connvectivity_radius(amount_clusters)
81 
82  # divide into cluster groups to visualize by colors
83  left_index_border = 0
84  current_index_border = 0
85  for index_border in range(len(borders)):
86  right_index_border = borders[index_border]
87  axis.bar(range(left_index_border, right_index_border), ordering[left_index_border:right_index_border], width = 1.0, color = color_list.TITLES[index_border])
88  left_index_border = right_index_border
89  current_index_border = index_border
90 
91  axis.bar(range(left_index_border, len(ordering)), ordering[left_index_border:len(ordering)], width = 1.0, color = color_list.TITLES[current_index_border + 1])
92 
93  plt.xlim([0, len(ordering)])
94 
95  plt.axhline(y = radius, linewidth = 2, color = 'black')
96  plt.text(0, radius + radius * 0.03, " Radius: " + str(round(radius, 4)) + ";\n Clusters: " + str(amount_clusters), color = 'b', fontsize = 10)
97 
98  else:
99  axis.bar(range(0, len(ordering)), ordering[0:len(ordering)], width = 1.0, color = 'black')
100  plt.xlim([0, len(ordering)])
101 
102  plt.show()
103 
104 
106  """!
107  @brief Analyser of cluster ordering diagram.
108  @details Using cluster-ordering it is able to connectivity radius for allocation of specified amount of clusters and
109  calculate amount of clusters using specified connectivity radius. Cluster-ordering is formed by OPTICS algorithm
110  during cluster analysis.
111 
112  @see optics
113 
114  """
115 
116  @property
117  def cluster_ordering(self):
118  """!
119  @brief (list) Returns values of dataset cluster ordering.
120 
121  """
122  return self.__ordering
123 
124 
125  def __init__(self, ordering_diagram):
126  """!
127  @brief Analyser of ordering diagram that is based on reachability-distances.
128 
129  @see calculate_connvectivity_radius
130 
131  """
132  self.__ordering = ordering_diagram
133 
134 
135  def __len__(self):
136  """!
137  @brief Returns length of clustering-ordering diagram.
138 
139  """
140  return len(self.__ordering)
141 
142 
143  def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations = 100):
144  """!
145  @brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and marks borders of clusters using indexes of values of ordering diagram.
146  @details Parameter 'maximum_iterations' is used to protect from hanging when it is impossible to allocate specified number of clusters.
147 
148  @param[in] amount_clusters (uint): amount of clusters that should be allocated by calculated connectivity radius.
149  @param[in] maximum_iterations (uint): maximum number of iteration for searching connectivity radius to allocated specified amount of clusters (by default it is restricted by 100 iterations).
150 
151  @return (double, list) Value of connectivity radius and borders of clusters like (radius, borders), radius may be 'None' as well as borders may be '[]'
152  if connectivity radius hasn't been found for the specified amount of iterations.
153 
154  """
155 
156  maximum_distance = max(self.__ordering)
157 
158  upper_distance = maximum_distance
159  lower_distance = 0.0
160 
161  result = None
162 
163  amount, borders = self.extract_cluster_amount(maximum_distance)
164  if amount <= amount_clusters:
165  for _ in range(maximum_iterations):
166  radius = (lower_distance + upper_distance) / 2.0
167 
168  amount, borders = self.extract_cluster_amount(radius)
169  if amount == amount_clusters:
170  result = radius
171  break
172 
173  elif amount == 0:
174  break
175 
176  elif amount > amount_clusters:
177  lower_distance = radius
178 
179  elif amount < amount_clusters:
180  upper_distance = radius
181 
182  return result, borders
183 
184 
185  def extract_cluster_amount(self, radius):
186  """!
187  @brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and borders between them.
188  @details When growth of reachability-distances is detected than it is considered as a start point of cluster,
189  than pick is detected and after that recession is observed until new growth (that means end of the
190  current cluster and start of a new one) or end of diagram.
191 
192  @param[in] radius (double): connectivity radius that is used for cluster allocation.
193 
194  @return (unit, list) Amount of clusters that can be allocated by the connectivity radius on ordering diagram and borders between them using indexes
195  from ordering diagram (amount_clusters, border_clusters).
196 
197  """
198 
199  amount_clusters = 1
200 
201  cluster_start = False
202  cluster_pick = False
203  total_similarity = True
204  previous_cluster_distance = None
205  previous_distance = None
206 
207  cluster_borders = []
208 
209  for index_ordering in range(len(self.__ordering)):
210  distance = self.__ordering[index_ordering]
211  if distance >= radius:
212  if cluster_start is False:
213  cluster_start = True
214  amount_clusters += 1
215 
216  if index_ordering != 0:
217  cluster_borders.append(index_ordering)
218 
219  else:
220  if (distance < previous_cluster_distance) and (cluster_pick is False):
221  cluster_pick = True
222 
223  elif (distance > previous_cluster_distance) and (cluster_pick is True):
224  cluster_pick = False
225  amount_clusters += 1
226 
227  if index_ordering != 0:
228  cluster_borders.append(index_ordering)
229 
230  previous_cluster_distance = distance
231 
232  else:
233  cluster_start = False
234  cluster_pick = False
235 
236  if (previous_distance is not None) and (distance != previous_distance):
237  total_similarity = False
238 
239  previous_distance = distance
240 
241  if (total_similarity is True) and (previous_distance > radius):
242  amount_clusters = 0
243 
244  return amount_clusters, cluster_borders
245 
246 
248  """!
249  @brief Object description that used by OPTICS algorithm for cluster analysis.
250 
251  """
252 
253  def __init__(self, index, core_distance = None, reachability_distance = None):
254  """!
255  @brief Constructor of object description in optics terms.
256 
257  @param[in] index (uint): Index of the object in the data set.
258  @param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
259  @param[in] reachability_distance (double): Reachability distance to this object.
260 
261  """
262 
263 
264  self.index_object = index
265 
266 
267  self.core_distance = core_distance
268 
269 
270  self.reachability_distance = reachability_distance
271 
272 
273  self.processed = False
274 
275  def __repr__(self):
276  """!
277  @brief Returns string representation of the optics descriptor.
278 
279  """
280 
281  return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance)
282 
283 
284 class optics:
285  """!
286  @brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with KD-tree optimization (ccore options is supported).
287  @details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
288  Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
289  for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
290  bigger than real - because it will be calculated by the algorithms if requested amount of clusters is not allocated.
291 
292  @image html optics_example_clustering.png "Scheme how does OPTICS works. At the beginning only one cluster is allocated, but two is requested. At the second step OPTICS calculates connectivity radius using cluster-ordering and performs final cluster allocation."
293 
294  Clustering example using sample 'Chainlink':
295  @code
296  from pyclustering.cluster import cluster_visualizer
297  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
298  from pyclustering.samples.definitions import FCPS_SAMPLES
299  from pyclustering.utils import read_sample
300 
301  # Read sample for clustering from some file.
302  sample = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
303 
304  # Run cluster analysis where connectivity radius is bigger than real.
305  radius = 0.5
306  neighbors = 3
307  optics_instance = optics(sample, radius, neighbors)
308 
309  # Performs cluster analysis.
310  optics_instance.process()
311 
312  # Obtain results of clustering.
313  clusters = optics_instance.get_clusters()
314  noise = optics_instance.get_noise()
315  ordering = optics_instance.get_ordering()
316 
317  # Visualize clustering results.
318  visualizer = cluster_visualizer()
319  visualizer.append_clusters(clusters, sample)
320  visualizer.show()
321 
322  # Display ordering.
323  analyser = ordering_analyser(ordering)
324  ordering_visualizer.show_ordering_diagram(analyser, 2)
325  @endcode
326 
327  Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
328  @code
329  from pyclustering.cluster import cluster_visualizer
330  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
331  from pyclustering.samples.definitions import FCPS_SAMPLES
332  from pyclustering.utils import read_sample
333 
334  # Read sample for clustering from some file
335  sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
336 
337  # Run cluster analysis where connectivity radius is bigger than real
338  radius = 2.0
339  neighbors = 3
340  amount_of_clusters = 3
341  optics_instance = optics(sample, radius, neighbors, amount_of_clusters)
342 
343  # Performs cluster analysis
344  optics_instance.process()
345 
346  # Obtain results of clustering
347  clusters = optics_instance.get_clusters()
348  noise = optics_instance.get_noise()
349  ordering = optics_instance.get_ordering()
350 
351  # Visualize ordering diagram
352  analyser = ordering_analyser(ordering)
353  ordering_visualizer.show_ordering_diagram(analyser, amount_of_clusters)
354 
355  # Visualize clustering results
356  visualizer = cluster_visualizer()
357  visualizer.append_clusters(clusters, sample)
358  visualizer.show()
359  @endcode
360 
361  Here is an example where OPTICS extracts outliers from sample 'Tetra':
362  @code
363  from pyclustering.cluster import cluster_visualizer
364  from pyclustering.cluster.optics import optics
365  from pyclustering.samples.definitions import FCPS_SAMPLES
366  from pyclustering.utils import read_sample
367 
368  # Read sample for clustering from some file.
369  sample = read_sample(FCPS_SAMPLES.SAMPLE_TETRA)
370 
371  # Run cluster analysis where connectivity radius is bigger than real.
372  radius = 0.4
373  neighbors = 3
374  optics_instance = optics(sample, radius, neighbors)
375 
376  # Performs cluster analysis.
377  optics_instance.process()
378 
379  # Obtain results of clustering.
380  clusters = optics_instance.get_clusters()
381  noise = optics_instance.get_noise()
382 
383  # Visualize clustering results (clusters and outliers).
384  visualizer = cluster_visualizer()
385  visualizer.append_clusters(clusters, sample)
386  visualizer.append_cluster(noise, sample, marker='x')
387  visualizer.show()
388  @endcode
389 
390  Visualization result of allocated clusters and outliers is presented on the image below:
391  @image html optics_noise_tetra.png "Clusters and outliers extracted by OPTICS algorithm from sample 'Tetra'."
392 
393  """
394 
395  def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, **kwargs):
396  """!
397  @brief Constructor of clustering algorithm OPTICS.
398 
399  @param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
400  @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
401  @param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
402  @param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
403  In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
404  in connectivity radius usage.
405  @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
406  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').
407 
408  <b>Keyword Args:</b><br>
409  - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
410 
411  """
412 
413  self.__sample_pointer = sample # Algorithm parameter - pointer to sample for processing.
414  self.__eps = eps # Algorithm parameter - connectivity radius between object for establish links between object.
415  self.__minpts = minpts # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
416  self.__amount_clusters = amount_clusters
417 
418  self.__ordering = None
419  self.__clusters = None
420  self.__noise = None
421  self.__optics_objects = None
422 
423  self.__data_type = kwargs.get('data_type', 'points')
424 
425  self.__kdtree = None
426  self.__ccore = ccore
427 
429 
430  if self.__ccore:
431  self.__ccore = ccore_library.workable()
432 
433  self.__verify_arguments()
434 
435 
436  def process(self):
437  """!
438  @brief Performs cluster analysis in line with rules of OPTICS algorithm.
439 
440  @return (optics) Returns itself (OPTICS instance).
441 
442  @see get_clusters()
443  @see get_noise()
444  @see get_ordering()
445 
446  """
447 
448  if self.__ccore is True:
449  self.__process_by_ccore()
450 
451  else:
452  self.__process_by_python()
453 
454  return self
455 
456 
457  def __process_by_ccore(self):
458  """!
459  @brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
460 
461  """
462 
463  (self.__clusters, self.__noise, self.__ordering, self.__eps,
464  objects_indexes, objects_core_distances, objects_reachability_distances) = \
465  wrapper.optics(self.__sample_pointer, self.__eps, self.__minpts, self.__amount_clusters, self.__data_type)
466 
467  self.__optics_objects = []
468  for i in range(len(objects_indexes)):
469  if objects_core_distances[i] < 0.0:
470  objects_core_distances[i] = None
471 
472  if objects_reachability_distances[i] < 0.0:
473  objects_reachability_distances[i] = None
474 
475  optics_object = optics_descriptor(objects_indexes[i], objects_core_distances[i], objects_reachability_distances[i])
476  optics_object.processed = True
477 
478  self.__optics_objects.append(optics_object)
479 
480 
481  def __process_by_python(self):
482  """!
483  @brief Performs cluster analysis using python code.
484 
485  """
486 
487  if self.__data_type == 'points':
488  self.__kdtree = kdtree_balanced(self.__sample_pointer, range(len(self.__sample_pointer)))
489 
490  self.__allocate_clusters()
491 
492  if (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())):
493  analyser = ordering_analyser(self.get_ordering())
494  radius, _ = analyser.calculate_connvectivity_radius(self.__amount_clusters)
495  if radius is not None:
496  self.__eps = radius
497  self.__allocate_clusters()
498 
499 
500  def __initialize(self, sample):
501  """!
502  @brief Initializes internal states and resets clustering results in line with input sample.
503 
504  """
505 
506  self.__processed = [False] * len(sample)
507  self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))] # List of OPTICS objects that corresponds to objects from input sample.
508  self.__ordered_database = [] # List of OPTICS objects in traverse order.
509 
510  self.__clusters = None # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
511  self.__noise = None # Result of clustering (noise).
512 
513 
514  def __allocate_clusters(self):
515  """!
516  @brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
517 
518  """
519 
520  self.__initialize(self.__sample_pointer)
521 
522  for optic_object in self.__optics_objects:
523  if optic_object.processed is False:
524  self.__expand_cluster_order(optic_object)
525 
526  self.__extract_clusters()
527 
528 
529  def get_clusters(self):
530  """!
531  @brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
532 
533  @return (list) List of allocated clusters.
534 
535  @see process()
536  @see get_noise()
537  @see get_ordering()
538  @see get_radius()
539 
540  """
541 
542  return self.__clusters
543 
544 
545  def get_noise(self):
546  """!
547  @brief Returns list of noise that contains indexes of objects that corresponds to input data.
548 
549  @return (list) List of allocated noise objects.
550 
551  @see process()
552  @see get_clusters()
553  @see get_ordering()
554  @see get_radius()
555 
556  """
557 
558  return self.__noise
559 
560 
561  def get_ordering(self):
562  """!
563  @brief Returns clustering ordering information about the input data set.
564  @details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
565 
566  @return (ordering_analyser) Analyser of clustering ordering.
567 
568  @see process()
569  @see get_clusters()
570  @see get_noise()
571  @see get_radius()
572  @see get_optics_objects()
573 
574  """
575 
576  if self.__ordering is None:
577  self.__ordering = []
578 
579  for cluster in self.__clusters:
580  for index_object in cluster:
581  optics_object = self.__optics_objects[index_object]
582  if optics_object.reachability_distance is not None:
583  self.__ordering.append(optics_object.reachability_distance)
584 
585  return self.__ordering
586 
587 
589  """!
590  @brief Returns OPTICS objects where each object contains information about index of point from processed data,
591  core distance and reachability distance.
592 
593  @return (list) OPTICS objects.
594 
595  @see get_ordering()
596  @see get_clusters()
597  @see get_noise()
598  @see optics_descriptor
599 
600  """
601 
602  return self.__optics_objects
603 
604 
605  def get_radius(self):
606  """!
607  @brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
608  @details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
609 
610  @return (double) Connectivity radius.
611 
612  @see get_ordering()
613  @see get_clusters()
614  @see get_noise()
615  @see get_optics_objects()
616 
617  """
618 
619  return self.__eps
620 
621 
623  """!
624  @brief Returns clustering result representation type that indicate how clusters are encoded.
625 
626  @return (type_encoding) Clustering result representation.
627 
628  @see get_clusters()
629 
630  """
631 
632  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
633 
634 
635  def __create_neighbor_searcher(self, data_type):
636  """!
637  @brief Returns neighbor searcher in line with data type.
638 
639  @param[in] data_type (string): Data type (points or distance matrix).
640 
641  """
642  if data_type == 'points':
643  return self.__neighbor_indexes_points
644  elif data_type == 'distance_matrix':
646  else:
647  raise TypeError("Unknown type of data is specified '%s'" % data_type)
648 
649 
650  def __expand_cluster_order(self, optics_object):
651  """!
652  @brief Expand cluster order from not processed optic-object that corresponds to object from input data.
653  Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
654  Order database is updated during expanding.
655 
656  @param[in] optics_object (optics_descriptor): Object that hasn't been processed.
657 
658  """
659 
660  optics_object.processed = True
661 
662  neighbors_descriptor = self.__neighbor_searcher(optics_object)
663  optics_object.reachability_distance = None
664 
665  self.__ordered_database.append(optics_object)
666 
667  # Check core distance
668  if len(neighbors_descriptor) >= self.__minpts:
669  neighbors_descriptor.sort(key = lambda obj: obj[1])
670  optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1]
671 
672  # Continue processing
673  order_seed = list()
674  self.__update_order_seed(optics_object, neighbors_descriptor, order_seed)
675 
676  while len(order_seed) > 0:
677  optic_descriptor = order_seed[0]
678  order_seed.remove(optic_descriptor)
679 
680  neighbors_descriptor = self.__neighbor_searcher(optic_descriptor)
681  optic_descriptor.processed = True
682 
683  self.__ordered_database.append(optic_descriptor)
684 
685  if len(neighbors_descriptor) >= self.__minpts:
686  neighbors_descriptor.sort(key = lambda obj: obj[1])
687  optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1]
688 
689  self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed)
690  else:
691  optic_descriptor.core_distance = None
692 
693  else:
694  optics_object.core_distance = None
695 
696 
697  def __extract_clusters(self):
698  """!
699  @brief Extract clusters and noise from order database.
700 
701  """
702 
703  self.__clusters = []
704  self.__noise = []
705 
706  current_cluster = self.__noise
707  for optics_object in self.__ordered_database:
708  if (optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps):
709  if (optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps):
710  self.__clusters.append([ optics_object.index_object ])
711  current_cluster = self.__clusters[-1]
712  else:
713  self.__noise.append(optics_object.index_object)
714  else:
715  current_cluster.append(optics_object.index_object)
716 
717 
718  def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
719  """!
720  @brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
721 
722  @param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
723  @param[in] neighbors_descriptors (list): List of neighbors of core-object.
724  @param[in|out] order_seed (list): List of sorted object in line with reachable distance.
725 
726  """
727 
728  for neighbor_descriptor in neighbors_descriptors:
729  index_neighbor = neighbor_descriptor[0]
730  current_reachable_distance = neighbor_descriptor[1]
731 
732  if self.__optics_objects[index_neighbor].processed is not True:
733  reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance)
734  if self.__optics_objects[index_neighbor].reachability_distance is None:
735  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
736 
737  # insert element in queue O(n) - worst case.
738  index_insertion = len(order_seed)
739  for index_seed in range(0, len(order_seed)):
740  if reachable_distance < order_seed[index_seed].reachability_distance:
741  index_insertion = index_seed
742  break
743 
744  order_seed.insert(index_insertion, self.__optics_objects[index_neighbor])
745 
746  else:
747  if reachable_distance < self.__optics_objects[index_neighbor].reachability_distance:
748  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
749  order_seed.sort(key=lambda obj: obj.reachability_distance)
750 
751 
752  def __neighbor_indexes_points(self, optic_object):
753  """!
754  @brief Return neighbors of the specified object in case of sequence of points.
755 
756  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
757 
758  @return (list) List of indexes of neighbors in line the connectivity radius.
759 
760  """
761  kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps)
762  return [[node_tuple[1].payload, math.sqrt(node_tuple[0])] for node_tuple in kdnodes if
763  node_tuple[1].payload != optic_object.index_object]
764 
765 
766  def __neighbor_indexes_distance_matrix(self, optic_object):
767  """!
768  @brief Return neighbors of the specified object in case of distance matrix.
769 
770  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
771 
772  @return (list) List of indexes of neighbors in line the connectivity radius.
773 
774  """
775  distances = self.__sample_pointer[optic_object.index_object]
776  return [[index_neighbor, distances[index_neighbor]] for index_neighbor in range(len(distances))
777  if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]
778 
779 
780  def __verify_arguments(self):
781  """!
782  @brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
783 
784  """
785  if len(self.__sample_pointer) == 0:
786  raise ValueError("Input data is empty (size: '%d')." % len(self.__sample_pointer))
787 
788  if self.__eps < 0:
789  raise ValueError("Connectivity radius (current value: '%d') should be greater or equal to 0." % self.__eps)
790 
791  if self.__minpts < 0:
792  raise ValueError("Minimum number of neighbors (current value: '%d') should be greater than 0." %
793  self.__minpts)
794 
795  if (self.__amount_clusters is not None) and (self.__amount_clusters <= 0):
796  raise ValueError("Amount of clusters (current value: '%d') should be greater than 0." %
797  self.__amount_clusters)
Object description that used by OPTICS algorithm for cluster analysis.
Definition: optics.py:247
def process(self)
Performs cluster analysis in line with rules of OPTICS algorithm.
Definition: optics.py:436
def __neighbor_indexes_points(self, optic_object)
Return neighbors of the specified object in case of sequence of points.
Definition: optics.py:752
def __len__(self)
Returns length of clustering-ordering diagram.
Definition: optics.py:135
def show_ordering_diagram(analyser, amount_clusters=None)
Display cluster-ordering (reachability-plot) diagram.
Definition: optics.py:58
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
Definition: optics.py:780
def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed)
Update sorted list of reachable objects (from core-object) that should be processed using neighbors o...
Definition: optics.py:718
def get_ordering(self)
Returns clustering ordering information about the input data set.
Definition: optics.py:561
Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with ...
Definition: optics.py:284
def get_optics_objects(self)
Returns OPTICS objects where each object contains information about index of point from processed dat...
Definition: optics.py:588
def __repr__(self)
Returns string representation of the optics descriptor.
Definition: optics.py:275
def cluster_ordering(self)
(list) Returns values of dataset cluster ordering.
Definition: optics.py:117
reachability_distance
Reachability distance - the smallest distance to be reachable by core object.
Definition: optics.py:270
Module for representing clustering results.
Definition: encoder.py:1
def __create_neighbor_searcher(self, data_type)
Returns neighbor searcher in line with data type.
Definition: optics.py:635
Colors used by pyclustering library for visualization.
Definition: color.py:1
index_object
Index of object from the input data.
Definition: optics.py:264
processed
True is object has been already traversed.
Definition: optics.py:273
core_distance
Core distance - the smallest distance to reach specified number of neighbors that is not greater then...
Definition: optics.py:267
def __init__(self, index, core_distance=None, reachability_distance=None)
Constructor of object description in optics terms.
Definition: optics.py:253
def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, kwargs)
Constructor of clustering algorithm OPTICS.
Definition: optics.py:395
Represents balanced static KD-tree that does not provide services to add and remove nodes after initi...
Definition: kdtree.py:264
def get_clusters(self)
Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster i...
Definition: optics.py:529
def __extract_clusters(self)
Extract clusters and noise from order database.
Definition: optics.py:697
Analyser of cluster ordering diagram.
Definition: optics.py:105
def __initialize(self, sample)
Initializes internal states and resets clustering results in line with input sample.
Definition: optics.py:500
def __allocate_clusters(self)
Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
Definition: optics.py:514
def extract_cluster_amount(self, radius)
Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and...
Definition: optics.py:185
def get_noise(self)
Returns list of noise that contains indexes of objects that corresponds to input data.
Definition: optics.py:545
def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations=100)
Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and ...
Definition: optics.py:143
def __process_by_ccore(self)
Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
Definition: optics.py:457
def __neighbor_indexes_distance_matrix(self, optic_object)
Return neighbors of the specified object in case of distance matrix.
Definition: optics.py:766
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: optics.py:622
def __process_by_python(self)
Performs cluster analysis using python code.
Definition: optics.py:481
Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering s...
Definition: optics.py:48
def __init__(self, ordering_diagram)
Analyser of ordering diagram that is based on reachability-distances.
Definition: optics.py:125
def get_radius(self)
Returns connectivity radius that is calculated and used for clustering by the algorithm.
Definition: optics.py:605
Data Structure: KD-Tree.
Definition: kdtree.py:1
def __expand_cluster_order(self, optics_object)
Expand cluster order from not processed optic-object that corresponds to object from input data...
Definition: optics.py:650