optics.py
1 """!
2 
3 @brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
4 @details Implementation based on paper @cite article::optics::1.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2019
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import math
29 import warnings
30 
31 try:
32  import matplotlib.pyplot as plt
33 except Exception as error_instance:
34  warnings.warn("Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization "
35  "functionality is not available (details: '%s')." % str(error_instance))
36 
37 from pyclustering.container.kdtree import kdtree
38 
39 from pyclustering.cluster.encoder import type_encoding
40 
41 from pyclustering.utils.color import color as color_list
42 
43 from pyclustering.core.wrapper import ccore_library
44 
45 import pyclustering.core.optics_wrapper as wrapper
46 
47 
49  """!
50  @brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
51  @details This OPTICS algorithm is KD-tree optimized.
52 
53  @see ordering_analyser
54 
55  """
56 
57  @staticmethod
58  def show_ordering_diagram(analyser, amount_clusters = None):
59  """!
60  @brief Display cluster-ordering (reachability-plot) diagram.
61 
62  @param[in] analyser (ordering_analyser): cluster-ordering analyser whose ordering diagram should be displayed.
63  @param[in] amount_clusters (uint): if it is not 'None' then it displays connectivity radius line that can used for allocation of specified amount of clusters
64  and colorize diagram by corresponding cluster colors.
65 
66  Example demonstrates general abilities of 'ordering_visualizer' class:
67  @code
68  # Display cluster-ordering diagram with connectivity radius is used for allocation of three clusters.
69  ordering_visualizer.show_ordering_diagram(analyser, 3);
70 
71  # Display cluster-ordering diagram without radius.
72  ordering_visualizer.show_ordering_diagram(analyser);
73  @endcode
74 
75  """
76  ordering = analyser.cluster_ordering
77  axis = plt.subplot(111)
78 
79  if amount_clusters is not None:
80  radius, borders = analyser.calculate_connvectivity_radius(amount_clusters)
81 
82  # divide into cluster groups to visualize by colors
83  left_index_border = 0
84  current_index_border = 0
85  for index_border in range(len(borders)):
86  right_index_border = borders[index_border]
87  axis.bar(range(left_index_border, right_index_border), ordering[left_index_border:right_index_border], width = 1.0, color = color_list.TITLES[index_border])
88  left_index_border = right_index_border
89  current_index_border = index_border
90 
91  axis.bar(range(left_index_border, len(ordering)), ordering[left_index_border:len(ordering)], width = 1.0, color = color_list.TITLES[current_index_border + 1])
92 
93  plt.xlim([0, len(ordering)])
94 
95  plt.axhline(y = radius, linewidth = 2, color = 'black')
96  plt.text(0, radius + radius * 0.03, " Radius: " + str(round(radius, 4)) + ";\n Clusters: " + str(amount_clusters), color = 'b', fontsize = 10)
97 
98  else:
99  axis.bar(range(0, len(ordering)), ordering[0:len(ordering)], width = 1.0, color = 'black')
100  plt.xlim([0, len(ordering)])
101 
102  plt.show()
103 
104 
106  """!
107  @brief Analyser of cluster ordering diagram.
108  @details Using cluster-ordering it is able to connectivity radius for allocation of specified amount of clusters and
109  calculate amount of clusters using specified connectivity radius. Cluster-ordering is formed by OPTICS algorithm
110  during cluster analysis.
111 
112  @see optics
113 
114  """
115 
116  @property
117  def cluster_ordering(self):
118  """!
119  @brief (list) Returns values of dataset cluster ordering.
120 
121  """
122  return self.__ordering
123 
124 
125  def __init__(self, ordering_diagram):
126  """!
127  @brief Analyser of ordering diagram that is based on reachability-distances.
128 
129  @see calculate_connvectivity_radius
130 
131  """
132  self.__ordering = ordering_diagram
133 
134 
135  def __len__(self):
136  """!
137  @brief Returns length of clustering-ordering diagram.
138 
139  """
140  return len(self.__ordering)
141 
142 
143  def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations = 100):
144  """!
145  @brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and marks borders of clusters using indexes of values of ordering diagram.
146  @details Parameter 'maximum_iterations' is used to protect from hanging when it is impossible to allocate specified number of clusters.
147 
148  @param[in] amount_clusters (uint): amount of clusters that should be allocated by calculated connectivity radius.
149  @param[in] maximum_iterations (uint): maximum number of iteration for searching connectivity radius to allocated specified amount of clusters (by default it is restricted by 100 iterations).
150 
151  @return (double, list) Value of connectivity radius and borders of clusters like (radius, borders), radius may be 'None' as well as borders may be '[]'
152  if connectivity radius hasn't been found for the specified amount of iterations.
153 
154  """
155 
156  maximum_distance = max(self.__ordering)
157 
158  upper_distance = maximum_distance
159  lower_distance = 0.0
160 
161  result = None
162 
163  amount, borders = self.extract_cluster_amount(maximum_distance)
164  if amount <= amount_clusters:
165  for _ in range(maximum_iterations):
166  radius = (lower_distance + upper_distance) / 2.0
167 
168  amount, borders = self.extract_cluster_amount(radius)
169  if amount == amount_clusters:
170  result = radius
171  break
172 
173  elif amount == 0:
174  break
175 
176  elif amount > amount_clusters:
177  lower_distance = radius
178 
179  elif amount < amount_clusters:
180  upper_distance = radius
181 
182  return result, borders
183 
184 
185  def extract_cluster_amount(self, radius):
186  """!
187  @brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and borders between them.
188  @details When growth of reachability-distances is detected than it is considered as a start point of cluster,
189  than pick is detected and after that recession is observed until new growth (that means end of the
190  current cluster and start of a new one) or end of diagram.
191 
192  @param[in] radius (double): connectivity radius that is used for cluster allocation.
193 
194  @return (unit, list) Amount of clusters that can be allocated by the connectivity radius on ordering diagram and borders between them using indexes
195  from ordering diagram (amount_clusters, border_clusters).
196 
197  """
198 
199  amount_clusters = 1
200 
201  cluster_start = False
202  cluster_pick = False
203  total_similarity = True
204  previous_cluster_distance = None
205  previous_distance = None
206 
207  cluster_borders = []
208 
209  for index_ordering in range(len(self.__ordering)):
210  distance = self.__ordering[index_ordering]
211  if distance >= radius:
212  if cluster_start is False:
213  cluster_start = True
214  amount_clusters += 1
215 
216  if index_ordering != 0:
217  cluster_borders.append(index_ordering)
218 
219  else:
220  if (distance < previous_cluster_distance) and (cluster_pick is False):
221  cluster_pick = True
222 
223  elif (distance > previous_cluster_distance) and (cluster_pick is True):
224  cluster_pick = False
225  amount_clusters += 1
226 
227  if index_ordering != 0:
228  cluster_borders.append(index_ordering)
229 
230  previous_cluster_distance = distance
231 
232  else:
233  cluster_start = False
234  cluster_pick = False
235 
236  if (previous_distance is not None) and (distance != previous_distance):
237  total_similarity = False
238 
239  previous_distance = distance
240 
241  if (total_similarity is True) and (previous_distance > radius):
242  amount_clusters = 0
243 
244  return amount_clusters, cluster_borders
245 
246 
248  """!
249  @brief Object description that used by OPTICS algorithm for cluster analysis.
250 
251  """
252 
253  def __init__(self, index, core_distance = None, reachability_distance = None):
254  """!
255  @brief Constructor of object description in optics terms.
256 
257  @param[in] index (uint): Index of the object in the data set.
258  @param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
259  @param[in] reachability_distance (double): Reachability distance to this object.
260 
261  """
262 
263 
264  self.index_object = index
265 
266 
267  self.core_distance = core_distance
268 
269 
270  self.reachability_distance = reachability_distance
271 
272 
273  self.processed = False
274 
275  def __repr__(self):
276  """!
277  @brief Returns string representation of the optics descriptor.
278 
279  """
280 
281  return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance)
282 
283 
284 class optics:
285  """!
286  @brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with KD-tree optimization (ccore options is supported).
287  @details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
288  Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
289  for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
290  bigger than real - because it will be calculated by the algorithms if requested amount of clusters is not allocated.
291 
292  @image html optics_example_clustering.png "Scheme how does OPTICS works. At the beginning only one cluster is allocated, but two is requested. At the second step OPTICS calculates connectivity radius using cluster-ordering and performs final cluster allocation."
293 
294  Clustering example using sample 'Chainlink':
295  @code
296  from pyclustering.cluster import cluster_visualizer
297  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
298  from pyclustering.samples.definitions import FCPS_SAMPLES
299  from pyclustering.utils import read_sample
300 
301  # Read sample for clustering from some file.
302  sample = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
303 
304  # Run cluster analysis where connectivity radius is bigger than real.
305  radius = 0.5
306  neighbors = 3
307  optics_instance = optics(sample, radius, neighbors)
308 
309  # Performs cluster analysis.
310  optics_instance.process()
311 
312  # Obtain results of clustering.
313  clusters = optics_instance.get_clusters()
314  noise = optics_instance.get_noise()
315  ordering = optics_instance.get_ordering()
316 
317  # Visualize clustering results.
318  visualizer = cluster_visualizer()
319  visualizer.append_clusters(clusters, sample)
320  visualizer.show()
321 
322  # Display ordering.
323  analyser = ordering_analyser(ordering)
324  ordering_visualizer.show_ordering_diagram(analyser, 2)
325  @endcode
326 
327  Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
328  @code
329  from pyclustering.cluster import cluster_visualizer
330  from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer
331  from pyclustering.samples.definitions import FCPS_SAMPLES
332  from pyclustering.utils import read_sample
333 
334  # Read sample for clustering from some file
335  sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
336 
337  # Run cluster analysis where connectivity radius is bigger than real
338  radius = 2.0
339  neighbors = 3
340  amount_of_clusters = 3
341  optics_instance = optics(sample, radius, neighbors, amount_of_clusters)
342 
343  # Performs cluster analysis
344  optics_instance.process()
345 
346  # Obtain results of clustering
347  clusters = optics_instance.get_clusters()
348  noise = optics_instance.get_noise()
349  ordering = optics_instance.get_ordering()
350 
351  # Visualize ordering diagram
352  analyser = ordering_analyser(ordering)
353  ordering_visualizer.show_ordering_diagram(analyser, amount_of_clusters)
354 
355  # Visualize clustering results
356  visualizer = cluster_visualizer()
357  visualizer.append_clusters(clusters, sample)
358  visualizer.show()
359  @endcode
360 
361  """
362 
363  def __init__(self, sample, eps, minpts, amount_clusters = None, ccore = True, **kwargs):
364  """!
365  @brief Constructor of clustering algorithm OPTICS.
366 
367  @param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
368  @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
369  @param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
370  @param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
371  In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
372  in connectivity radius usage.
373  @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
374  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').
375 
376  <b>Keyword Args:</b><br>
377  - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
378 
379  """
380 
381  self.__sample_pointer = sample # Algorithm parameter - pointer to sample for processing.
382  self.__eps = eps # Algorithm parameter - connectivity radius between object for establish links between object.
383  self.__minpts = minpts # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
384  self.__amount_clusters = amount_clusters
385 
386  self.__ordering = None
387  self.__clusters = None
388  self.__noise = None
389  self.__optics_objects = None
390 
391  self.__data_type = kwargs.get('data_type', 'points')
392 
393  self.__kdtree = None
394  self.__ccore = ccore
395 
397 
398  if self.__ccore:
399  self.__ccore = ccore_library.workable()
400 
401 
402  def process(self):
403  """!
404  @brief Performs cluster analysis in line with rules of OPTICS algorithm.
405 
406  @remark Results of clustering can be obtained using corresponding gets methods.
407 
408  @see get_clusters()
409  @see get_noise()
410  @see get_ordering()
411 
412  """
413 
414  if self.__ccore is True:
415  self.__process_by_ccore()
416 
417  else:
418  self.__process_by_python()
419 
420 
421  def __process_by_ccore(self):
422  """!
423  @brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
424 
425  """
426 
427  (self.__clusters, self.__noise, self.__ordering, self.__eps,
428  objects_indexes, objects_core_distances, objects_reachability_distances) = \
429  wrapper.optics(self.__sample_pointer, self.__eps, self.__minpts, self.__amount_clusters, self.__data_type)
430 
431  self.__optics_objects = []
432  for i in range(len(objects_indexes)):
433  if objects_core_distances[i] < 0.0:
434  objects_core_distances[i] = None
435 
436  if objects_reachability_distances[i] < 0.0:
437  objects_reachability_distances[i] = None
438 
439  optics_object = optics_descriptor(objects_indexes[i], objects_core_distances[i], objects_reachability_distances[i])
440  optics_object.processed = True
441 
442  self.__optics_objects.append(optics_object)
443 
444 
445  def __process_by_python(self):
446  """!
447  @brief Performs cluster analysis using python code.
448 
449  """
450 
451  if self.__data_type == 'points':
452  self.__kdtree = kdtree(self.__sample_pointer, range(len(self.__sample_pointer)))
453 
454  self.__allocate_clusters()
455 
456  if (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())):
457  analyser = ordering_analyser(self.get_ordering())
458  radius, _ = analyser.calculate_connvectivity_radius(self.__amount_clusters)
459  if radius is not None:
460  self.__eps = radius
461  self.__allocate_clusters()
462 
463 
464  def __initialize(self, sample):
465  """!
466  @brief Initializes internal states and resets clustering results in line with input sample.
467 
468  """
469 
470  self.__processed = [False] * len(sample)
471  self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))] # List of OPTICS objects that corresponds to objects from input sample.
472  self.__ordered_database = [] # List of OPTICS objects in traverse order.
473 
474  self.__clusters = None # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
475  self.__noise = None # Result of clustering (noise).
476 
477 
478  def __allocate_clusters(self):
479  """!
480  @brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
481 
482  """
483 
484  self.__initialize(self.__sample_pointer)
485 
486  for optic_object in self.__optics_objects:
487  if optic_object.processed is False:
488  self.__expand_cluster_order(optic_object)
489 
490  self.__extract_clusters()
491 
492 
493  def get_clusters(self):
494  """!
495  @brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
496 
497  @return (list) List of allocated clusters.
498 
499  @see process()
500  @see get_noise()
501  @see get_ordering()
502  @see get_radius()
503 
504  """
505 
506  return self.__clusters
507 
508 
509  def get_noise(self):
510  """!
511  @brief Returns list of noise that contains indexes of objects that corresponds to input data.
512 
513  @return (list) List of allocated noise objects.
514 
515  @see process()
516  @see get_clusters()
517  @see get_ordering()
518  @see get_radius()
519 
520  """
521 
522  return self.__noise
523 
524 
525  def get_ordering(self):
526  """!
527  @brief Returns clustering ordering information about the input data set.
528  @details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
529 
530  @return (ordering_analyser) Analyser of clustering ordering.
531 
532  @see process()
533  @see get_clusters()
534  @see get_noise()
535  @see get_radius()
536  @see get_optics_objects()
537 
538  """
539 
540  if self.__ordering is None:
541  self.__ordering = []
542 
543  for cluster in self.__clusters:
544  for index_object in cluster:
545  optics_object = self.__optics_objects[index_object]
546  if optics_object.reachability_distance is not None:
547  self.__ordering.append(optics_object.reachability_distance)
548 
549  return self.__ordering
550 
551 
553  """!
554  @brief Returns OPTICS objects where each object contains information about index of point from processed data,
555  core distance and reachability distance.
556 
557  @return (list) OPTICS objects.
558 
559  @see get_ordering()
560  @see get_clusters()
561  @see get_noise()
562  @see optics_descriptor
563 
564  """
565 
566  return self.__optics_objects
567 
568 
569  def get_radius(self):
570  """!
571  @brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
572  @details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
573 
574  @return (double) Connectivity radius.
575 
576  @see get_ordering()
577  @see get_clusters()
578  @see get_noise()
579  @see get_optics_objects()
580 
581  """
582 
583  return self.__eps
584 
585 
587  """!
588  @brief Returns clustering result representation type that indicate how clusters are encoded.
589 
590  @return (type_encoding) Clustering result representation.
591 
592  @see get_clusters()
593 
594  """
595 
596  return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
597 
598 
599  def __create_neighbor_searcher(self, data_type):
600  """!
601  @brief Returns neighbor searcher in line with data type.
602 
603  @param[in] data_type (string): Data type (points or distance matrix).
604 
605  """
606  if data_type == 'points':
607  return self.__neighbor_indexes_points
608  elif data_type == 'distance_matrix':
610  else:
611  raise TypeError("Unknown type of data is specified '%s'" % data_type)
612 
613 
614  def __expand_cluster_order(self, optics_object):
615  """!
616  @brief Expand cluster order from not processed optic-object that corresponds to object from input data.
617  Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
618  Order database is updated during expanding.
619 
620  @param[in] optics_object (optics_descriptor): Object that hasn't been processed.
621 
622  """
623 
624  optics_object.processed = True
625 
626  neighbors_descriptor = self.__neighbor_searcher(optics_object)
627  optics_object.reachability_distance = None
628 
629  self.__ordered_database.append(optics_object)
630 
631  # Check core distance
632  if len(neighbors_descriptor) >= self.__minpts:
633  neighbors_descriptor.sort(key = lambda obj: obj[1])
634  optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1]
635 
636  # Continue processing
637  order_seed = list()
638  self.__update_order_seed(optics_object, neighbors_descriptor, order_seed)
639 
640  while len(order_seed) > 0:
641  optic_descriptor = order_seed[0]
642  order_seed.remove(optic_descriptor)
643 
644  neighbors_descriptor = self.__neighbor_searcher(optic_descriptor)
645  optic_descriptor.processed = True
646 
647  self.__ordered_database.append(optic_descriptor)
648 
649  if len(neighbors_descriptor) >= self.__minpts:
650  neighbors_descriptor.sort(key = lambda obj: obj[1])
651  optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1]
652 
653  self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed)
654  else:
655  optic_descriptor.core_distance = None
656 
657  else:
658  optics_object.core_distance = None
659 
660 
661  def __extract_clusters(self):
662  """!
663  @brief Extract clusters and noise from order database.
664 
665  """
666 
667  self.__clusters = []
668  self.__noise = []
669 
670  current_cluster = self.__noise
671  for optics_object in self.__ordered_database:
672  if (optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps):
673  if (optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps):
674  self.__clusters.append([ optics_object.index_object ])
675  current_cluster = self.__clusters[-1]
676  else:
677  self.__noise.append(optics_object.index_object)
678  else:
679  current_cluster.append(optics_object.index_object)
680 
681 
682  def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
683  """!
684  @brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
685 
686  @param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
687  @param[in] neighbors_descriptors (list): List of neighbors of core-object.
688  @param[in|out] order_seed (list): List of sorted object in line with reachable distance.
689 
690  """
691 
692  for neighbor_descriptor in neighbors_descriptors:
693  index_neighbor = neighbor_descriptor[0]
694  current_reachable_distance = neighbor_descriptor[1]
695 
696  if self.__optics_objects[index_neighbor].processed is not True:
697  reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance)
698  if self.__optics_objects[index_neighbor].reachability_distance is None:
699  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
700 
701  # insert element in queue O(n) - worst case.
702  index_insertion = len(order_seed)
703  for index_seed in range(0, len(order_seed)):
704  if reachable_distance < order_seed[index_seed].reachability_distance:
705  index_insertion = index_seed
706  break
707 
708  order_seed.insert(index_insertion, self.__optics_objects[index_neighbor])
709 
710  else:
711  if reachable_distance < self.__optics_objects[index_neighbor].reachability_distance:
712  self.__optics_objects[index_neighbor].reachability_distance = reachable_distance
713  order_seed.sort(key = lambda obj: obj.reachability_distance)
714 
715 
716  def __neighbor_indexes_points(self, optic_object):
717  """!
718  @brief Return neighbors of the specified object in case of sequence of points.
719 
720  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
721 
722  @return (list) List of indexes of neighbors in line the connectivity radius.
723 
724  """
725  kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps)
726  return [[node_tuple[1].payload, math.sqrt(node_tuple[0])] for node_tuple in kdnodes if
727  node_tuple[1].payload != optic_object.index_object]
728 
729 
730  def __neighbor_indexes_distance_matrix(self, optic_object):
731  """!
732  @brief Return neighbors of the specified object in case of distance matrix.
733 
734  @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
735 
736  @return (list) List of indexes of neighbors in line the connectivity radius.
737 
738  """
739  distances = self.__sample_pointer[optic_object.index_object]
740  return [[index_neighbor, distances[index_neighbor]] for index_neighbor in range(len(distances))
741  if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]
Object description that used by OPTICS algorithm for cluster analysis.
Definition: optics.py:247
def process(self)
Performs cluster analysis in line with rules of OPTICS algorithm.
Definition: optics.py:402
def __neighbor_indexes_points(self, optic_object)
Return neighbors of the specified object in case of sequence of points.
Definition: optics.py:716
def __len__(self)
Returns length of clustering-ordering diagram.
Definition: optics.py:135
def show_ordering_diagram(analyser, amount_clusters=None)
Display cluster-ordering (reachability-plot) diagram.
Definition: optics.py:58
def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed)
Update sorted list of reachable objects (from core-object) that should be processed using neighbors o...
Definition: optics.py:682
def get_ordering(self)
Returns clustering ordering information about the input data set.
Definition: optics.py:525
Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with ...
Definition: optics.py:284
def get_optics_objects(self)
Returns OPTICS objects where each object contains information about index of point from processed dat...
Definition: optics.py:552
def __repr__(self)
Returns string representation of the optics descriptor.
Definition: optics.py:275
def cluster_ordering(self)
(list) Returns values of dataset cluster ordering.
Definition: optics.py:117
reachability_distance
Reachability distance - the smallest distance to be reachable by core object.
Definition: optics.py:270
Module for representing clustering results.
Definition: encoder.py:1
Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimension...
Definition: kdtree.py:157
def __create_neighbor_searcher(self, data_type)
Returns neighbor searcher in line with data type.
Definition: optics.py:599
Colors used by pyclustering library for visualization.
Definition: color.py:1
index_object
Index of object from the input data.
Definition: optics.py:264
processed
True is object has been already traversed.
Definition: optics.py:273
core_distance
Core distance - the smallest distance to reach specified number of neighbors that is not greater then...
Definition: optics.py:267
def __init__(self, index, core_distance=None, reachability_distance=None)
Constructor of object description in optics terms.
Definition: optics.py:253
def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, kwargs)
Constructor of clustering algorithm OPTICS.
Definition: optics.py:363
def get_clusters(self)
Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster i...
Definition: optics.py:493
def __extract_clusters(self)
Extract clusters and noise from order database.
Definition: optics.py:661
Analyser of cluster ordering diagram.
Definition: optics.py:105
def __initialize(self, sample)
Initializes internal states and resets clustering results in line with input sample.
Definition: optics.py:464
def __allocate_clusters(self)
Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
Definition: optics.py:478
def extract_cluster_amount(self, radius)
Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and...
Definition: optics.py:185
def get_noise(self)
Returns list of noise that contains indexes of objects that corresponds to input data.
Definition: optics.py:509
def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations=100)
Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and ...
Definition: optics.py:143
def __process_by_ccore(self)
Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
Definition: optics.py:421
def __neighbor_indexes_distance_matrix(self, optic_object)
Return neighbors of the specified object in case of distance matrix.
Definition: optics.py:730
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: optics.py:586
def __process_by_python(self)
Performs cluster analysis using python code.
Definition: optics.py:445
Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering s...
Definition: optics.py:48
def __init__(self, ordering_diagram)
Analyser of ordering diagram that is based on reachability-distances.
Definition: optics.py:125
def get_radius(self)
Returns connectivity radius that is calculated and used for clustering by the algorithm.
Definition: optics.py:569
Data Structure: KD-Tree.
Definition: kdtree.py:1
def __expand_cluster_order(self, optics_object)
Expand cluster order from not processed optic-object that corresponds to object from input data...
Definition: optics.py:614