kdtree.py
1 """!
2 
3 @brief Data Structure: KD-Tree
4 @details Implementation based on paper @cite book::the_design_and_analysis.
5 
6 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @date 2014-2018
8 @copyright GNU Public License
9 
10 @cond GNU_PUBLIC_LICENSE
11  PyClustering is free software: you can redistribute it and/or modify
12  it under the terms of the GNU General Public License as published by
13  the Free Software Foundation, either version 3 of the License, or
14  (at your option) any later version.
15 
16  PyClustering is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 @endcond
24 
25 """
26 
27 
28 import numpy
29 
30 from pyclustering.utils import euclidean_distance_square
31 
32 
34  """!
35  @brief KD-tree text visualizer that provides service to diplay tree structure using text representation.
36 
37  """
38 
39  def __init__(self, kdtree_instance):
40  """!
41  @brief Initialize KD-tree text visualizer.
42 
43  @param[in] kdtree_instance (kdtree): Instance of KD-Tree that should be visualized.
44 
45  """
46  self.__kdtree_instance = kdtree_instance
47 
48  self.__tree_level_text = ""
49  self.__tree_text = ""
50 
51 
52  def visualize(self, display=True):
53  """!
54  @brief Display KD-tree to console.
55 
56  @param[in] display (bool): If 'True' then tree will be shown in console.
57 
58  @return (string) Text representation of the KD-tree.
59 
60  """
61 
62  kdnodes = self.__get_nodes()
63  level = kdnodes[0]
64 
65  for kdnode in kdnodes:
66  self.__print_node(level, kdnode)
67 
68  self.__tree_text += self.__tree_level_text
69  if display is True:
70  print(self.__tree_text)
71 
72  return self.__tree_text
73 
74 
75  def __print_node(self, level, kdnode):
76  if level == kdnode[0]:
77  self.__tree_level_text += str(kdnode[1]) + "\t"
78 
79  else:
80  self.__tree_text += self.__tree_level_text + "\n"
81  level = kdnode[0]
82  self.__tree_level_text = str(kdnode[1]) + "\t"
83 
84 
85  def __get_nodes(self):
86  kdnodes = self.__kdtree_instance.traverse()
87  if kdnodes == []:
88  return
89 
90  kdnodes.sort(key = lambda item: item[0])
91  return kdnodes
92 
93 
94 
95 class node:
96  """!
97  @brief Represents node of KD-Tree.
98 
99  """
100 
101  def __init__(self, data=None, payload=None, left=None, right=None, disc=None, parent=None):
102  """!
103  @brief
104 
105  @param[in] data (list): Data point that is presented as list of coodinates.
106  @param[in] payload (*): Payload of node (pointer to essense that is attached to this node).
107  @param[in] left (node): Node of KD-Tree that is represented left successor.
108  @param[in] right (node): Node of KD-Tree that is represented right successor.
109  @param[in] disc (uint): Index of dimension of that node.
110  @param[in] parent (node): Node of KD-Tree that is represented parent.
111 
112  """
113 
114 
115  self.data = data
116 
117 
118  self.payload = payload
119 
120 
121  self.left = left
122 
123 
124  self.right = right
125 
126 
127  self.disc = disc
128 
129 
130  self.parent = parent
131 
132 
133  def __repr__(self):
134  """!
135  @return (string) Default representation of the node.
136 
137  """
138  left = None
139  right = None
140 
141  if self.left is not None:
142  left = self.left.data
143 
144  if self.right is not None:
145  right = self.right.data
146 
147  return "(%s: [L:'%s', R:'%s'])" % (self.data, left, right)
148 
149  def __str__(self):
150  """!
151  @return (string) String representation of the node.
152 
153  """
154  return self.__repr__()
155 
156 
157 class kdtree:
158  """!
159  @brief Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimensional space.
160  @details In the term k-d tree, k denotes the dimensionality of the space being represented. Each data point is represented
161  as a node in the k-d tree in the form of a record of type node.
162 
163  Examples:
164  @code
165  # Import required modules
166  from pyclustering.samples.definitions import SIMPLE_SAMPLES;
167  from pyclustering.container.kdtree import kdtree;
168  from pyclustering.utils import read_sample;
169 
170  # Read data from text file
171  sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
172 
173  # Create instance of KD-tree and initialize (fill) it by read data.
174  tree_instance = kdtree(sample);
175 
176  # Search for nearest point
177  search_distance = 0.3;
178  nearest_node = tree_instance.find_nearest_dist_node([1.12, 4.31], search_distance);
179 
180  # Search for nearest point in radius 0.3
181  nearest_nodes = tree_instance.find_nearest_dist_nodes([1.12, 4.31], search_distance);
182  print("Nearest nodes:", nearest_nodes);
183  @endcode
184 
185  """
186 
187  def __init__(self, data_list = None, payload_list = None):
188  """!
189  @brief Create kd-tree from list of points and from according list of payloads.
190  @details If lists were not specified then empty kd-tree will be created.
191 
192  @param[in] data_list (list): Insert points from the list to created KD tree.
193  @param[in] payload_list (list): Insert payload from the list to created KD tree, length should be equal to length of data_list if it is specified.
194 
195  """
196 
197  self.__root = None
198  self.__dimension = None
199  self.__point_comparator = None
200 
201  self.__fill_tree(data_list, payload_list)
202 
203 
204  def insert(self, point, payload):
205  """!
206  @brief Insert new point with payload to kd-tree.
207 
208  @param[in] point (list): Coordinates of the point of inserted node.
209  @param[in] payload (any-type): Payload of inserted node. It can be identificator of the node or
210  some useful payload that belongs to the point.
211 
212  @return (node) Inserted node to the kd-tree.
213 
214  """
215 
216  if self.__root is None:
217  self.__dimension = len(point)
218  self.__root = node(point, payload, None, None, 0)
219  self.__point_comparator = self.__create_point_comparator(type(point))
220  return self.__root
221 
222  cur_node = self.__root
223 
224  while True:
225  if cur_node.data[cur_node.disc] <= point[cur_node.disc]:
226  # If new node is greater or equal than current node then check right leaf
227  if cur_node.right is None:
228  discriminator = cur_node.disc + 1
229  if discriminator >= self.__dimension:
230  discriminator = 0
231 
232  cur_node.right = node(point, payload, None, None, discriminator, cur_node)
233  return cur_node.right
234  else:
235  cur_node = cur_node.right
236 
237  else:
238  # If new node is less than current then check left leaf
239  if cur_node.left is None:
240  discriminator = cur_node.disc + 1
241  if discriminator >= self.__dimension:
242  discriminator = 0
243 
244  cur_node.left = node(point, payload, None, None, discriminator, cur_node)
245  return cur_node.left
246  else:
247  cur_node = cur_node.left
248 
249 
250  def remove(self, point, **kwargs):
251  """!
252  @brief Remove specified point from kd-tree.
253  @details It removes the first found node that satisfy to the input parameters. Make sure that
254  pair (point, payload) is unique for each node, othewise the first found is removed.
255 
256  @param[in] point (list): Coordinates of the point of removed node.
257  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'payload').
258 
259  <b>Keyword Args:</b><br>
260  - payload (any): Payload of the node that should be removed.
261 
262  @return (node) Root if node has been successfully removed, otherwise None.
263 
264  """
265 
266  # Get required node
267  node_for_remove = None
268  if 'payload' in kwargs:
269  node_for_remove = self.find_node_with_payload(point, kwargs['payload'], None)
270  else:
271  node_for_remove = self.find_node(point, None)
272 
273  if node_for_remove is None:
274  return None
275 
276  parent = node_for_remove.parent
277  minimal_node = self.__recursive_remove(node_for_remove)
278  if parent is None:
279  self.__root = minimal_node
280 
281  # If all k-d tree was destroyed
282  if minimal_node is not None:
283  minimal_node.parent = None
284  else:
285  if parent.left is node_for_remove:
286  parent.left = minimal_node
287  elif parent.right is node_for_remove:
288  parent.right = minimal_node
289 
290  return self.__root
291 
292 
293  def __recursive_remove(self, node_removed):
294  """!
295  @brief Delete node and return root of subtree.
296 
297  @param[in] node_removed (node): Node that should be removed.
298 
299  @return (node) Minimal node in line with coordinate that is defined by descriminator.
300 
301  """
302 
303  # Check if it is leaf
304  if (node_removed.right is None) and (node_removed.left is None):
305  return None
306 
307  discriminator = node_removed.disc
308 
309  # Check if only left branch exist
310  if node_removed.right is None:
311  node_removed.right = node_removed.left
312  node_removed.left = None
313 
314  # Find minimal node in line with coordinate that is defined by discriminator
315  minimal_node = self.find_minimal_node(node_removed.right, discriminator)
316  parent = minimal_node.parent
317 
318  if parent.left is minimal_node:
319  parent.left = self.__recursive_remove(minimal_node)
320  elif parent.right is minimal_node:
321  parent.right = self.__recursive_remove(minimal_node)
322 
323  minimal_node.parent = node_removed.parent
324  minimal_node.disc = node_removed.disc
325  minimal_node.right = node_removed.right
326  minimal_node.left = node_removed.left
327 
328  # Update parent for successors of previous parent.
329  if minimal_node.right is not None:
330  minimal_node.right.parent = minimal_node
331 
332  if minimal_node.left is not None:
333  minimal_node.left.parent = minimal_node
334 
335  return minimal_node
336 
337 
338  def find_minimal_node(self, node_head, discriminator):
339  """!
340  @brief Find minimal node in line with coordinate that is defined by discriminator.
341 
342  @param[in] node_head (node): Node of KD tree from that search should be started.
343  @param[in] discriminator (uint): Coordinate number that is used for comparison.
344 
345  @return (node) Minimal node in line with descriminator from the specified node.
346 
347  """
348 
349  min_key = lambda cur_node: cur_node.data[discriminator]
350  stack = []
351  candidates = []
352  isFinished = False
353  while isFinished is False:
354  if node_head is not None:
355  stack.append(node_head)
356  node_head = node_head.left
357  else:
358  if len(stack) != 0:
359  node_head = stack.pop()
360  candidates.append(node_head)
361  node_head = node_head.right
362  else:
363  isFinished = True
364 
365  return min(candidates, key = min_key)
366 
367 
368  def __fill_tree(self, data_list, payload_list):
369  """!
370  @brief Fill KD-tree by specified data and create point comparator in line with data type.
371 
372  @param[in] data_list (array_like): Data points that should be inserted to the tree.
373  @param[in] payload_list (array_like): Data point payloads that follows data points inserted to the tree.
374 
375  """
376  if data_list is None or len(data_list) == 0:
377  return # Just return from here, tree can be filled by insert method later
378 
379  if payload_list is None:
380  # Case when payload is not specified.
381  for index in range(0, len(data_list)):
382  self.insert(data_list[index], None)
383  else:
384  # Case when payload is specified.
385  for index in range(0, len(data_list)):
386  self.insert(data_list[index], payload_list[index])
387 
388  self.__point_comparator = self.__create_point_comparator(type(self.__root.data))
389 
390 
391  def __create_point_comparator(self, type_point):
392  """!
393  @brief Create point comparator.
394  @details In case of numpy.array specific comparator is required.
395 
396  @param[in] type_point (data_type): Type of point that is stored in KD-node.
397 
398  @return (callable) Callable point comparator to compare to points.
399 
400  """
401  if type_point == numpy.ndarray:
402  return lambda obj1, obj2: numpy.array_equal(obj1, obj2)
403 
404  return lambda obj1, obj2: obj1 == obj2
405 
406 
407  def __find_node_by_rule(self, point, search_rule, cur_node):
408  """!
409  @brief Search node that satisfy to parameters in search rule.
410  @details If node with specified parameters does not exist then None will be returned,
411  otherwise required node will be returned.
412 
413  @param[in] point (list): Coordinates of the point whose node should be found.
414  @param[in] search_rule (lambda): Rule that is called to check whether node satisfies to search parameter.
415  @param[in] cur_node (node): Node from which search should be started.
416 
417  @return (node) Node if it satisfies to input parameters, otherwise it return None.
418 
419  """
420 
421  req_node = None
422 
423  if cur_node is None:
424  cur_node = self.__root
425 
426  while cur_node:
427  if cur_node.data[cur_node.disc] <= point[cur_node.disc]:
428  # Check if it's required node
429  if search_rule(cur_node):
430  req_node = cur_node
431  break
432 
433  cur_node = cur_node.right
434 
435  else:
436  cur_node = cur_node.left
437 
438  return req_node
439 
440 
441  def find_node_with_payload(self, point, point_payload, cur_node = None):
442  """!
443  @brief Find node with specified coordinates and payload.
444  @details If node with specified parameters does not exist then None will be returned,
445  otherwise required node will be returned.
446 
447  @param[in] point (list): Coordinates of the point whose node should be found.
448  @param[in] point_payload (any): Payload of the node that is searched in the tree.
449  @param[in] cur_node (node): Node from which search should be started.
450 
451  @return (node) Node if it satisfies to input parameters, otherwise it return None.
452 
453  """
454 
455  rule_search = lambda node, point=point, payload=point_payload: self.__point_comparator(node.data, point) and node.payload == payload
456  return self.__find_node_by_rule(point, rule_search, cur_node)
457 
458 
459  def find_node(self, point, cur_node = None):
460  """!
461  @brief Find node with coordinates that are defined by specified point.
462  @details If node with specified parameters does not exist then None will be returned,
463  otherwise required node will be returned.
464 
465  @param[in] point (list): Coordinates of the point whose node should be found.
466  @param[in] cur_node (node): Node from which search should be started.
467 
468  @return (node) Node if it satisfies to input parameters, otherwise it return None.
469 
470  """
471 
472  rule_search = lambda node, point=point: self.__point_comparator(node.data, point)
473  return self.__find_node_by_rule(point, rule_search, cur_node)
474 
475 
476  def find_nearest_dist_node(self, point, distance, retdistance = False):
477  """!
478  @brief Find nearest neighbor in area with radius = distance.
479 
480  @param[in] point (list): Maximum distance where neighbors are searched.
481  @param[in] distance (double): Maximum distance where neighbors are searched.
482  @param[in] retdistance (bool): If True - returns neighbors with distances to them, otherwise only neighbors is returned.
483 
484  @return (node|list) Nearest neighbor if 'retdistance' is False and list with two elements [node, distance] if 'retdistance' is True,
485  where the first element is pointer to node and the second element is distance to it.
486 
487  """
488 
489  best_nodes = self.find_nearest_dist_nodes(point, distance)
490 
491  if best_nodes == []:
492  return None
493 
494  nearest = min(best_nodes, key = lambda item: item[0])
495 
496  if retdistance is True:
497  return nearest
498  else:
499  return nearest[1]
500 
501 
502  def find_nearest_dist_nodes(self, point, distance):
503  """!
504  @brief Find neighbors that are located in area that is covered by specified distance.
505 
506  @param[in] point (list): Coordinates that is considered as centroind for searching.
507  @param[in] distance (double): Distance from the center where seaching is performed.
508 
509  @return (list) Neighbors in area that is specified by point (center) and distance (radius).
510 
511  """
512 
513  best_nodes = []
514  if self.__root is not None:
515  self.__recursive_nearest_nodes(point, distance, distance * distance, self.__root, best_nodes)
516 
517  return best_nodes
518 
519 
520  def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes):
521  """!
522  @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance.
523 
524  @param[in] point (list): Coordinates that is considered as centroind for searching
525  @param[in] distance (double): Distance from the center where seaching is performed.
526  @param[in] sqrt_distance (double): Square distance from the center where searching is performed.
527  @param[in] node_head (node): Node from that searching is performed.
528  @param[in|out] best_nodes (list): List of founded nodes.
529 
530  """
531 
532  if node_head.right is not None:
533  minimum = node_head.data[node_head.disc] - distance
534  if point[node_head.disc] >= minimum:
535  self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.right, best_nodes)
536 
537  if node_head.left is not None:
538  maximum = node_head.data[node_head.disc] + distance
539  if point[node_head.disc] < maximum:
540  self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.left, best_nodes)
541 
542  candidate_distance = euclidean_distance_square(point, node_head.data)
543  if candidate_distance <= sqrt_distance:
544  best_nodes.append( (candidate_distance, node_head) )
545 
546 
547  def children(self, node_parent):
548  """!
549  @brief Returns list of children of node.
550 
551  @param[in] node_parent (node): Node whose children are required.
552 
553  @return (list) Children of node. If node haven't got any child then None is returned.
554 
555  """
556 
557  if node_parent.left is not None:
558  yield node_parent.left
559  if node_parent.right is not None:
560  yield node_parent.right
561 
562 
563  def traverse(self, start_node = None, level = None):
564  """!
565  @brief Traverses all nodes of subtree that is defined by node specified in input parameter.
566 
567  @param[in] start_node (node): Node from that travering of subtree is performed.
568  @param[in, out] level (uint): Should be ignored by application.
569 
570  @return (list) All nodes of the subtree.
571 
572  """
573 
574  if start_node is None:
575  start_node = self.__root
576  level = 0
577 
578  if start_node is None:
579  return []
580 
581  items = [ (level, start_node) ]
582  for child in self.children(start_node):
583  if child is not None:
584  items += self.traverse(child, level + 1)
585 
586  return items
def find_minimal_node(self, node_head, discriminator)
Find minimal node in line with coordinate that is defined by discriminator.
Definition: kdtree.py:338
def find_node(self, point, cur_node=None)
Find node with coordinates that are defined by specified point.
Definition: kdtree.py:459
left
Left node successor of the node.
Definition: kdtree.py:121
def traverse(self, start_node=None, level=None)
Traverses all nodes of subtree that is defined by node specified in input parameter.
Definition: kdtree.py:563
def __recursive_remove(self, node_removed)
Delete node and return root of subtree.
Definition: kdtree.py:293
KD-tree text visualizer that provides service to diplay tree structure using text representation...
Definition: kdtree.py:33
parent
Parent node of the node.
Definition: kdtree.py:130
def __find_node_by_rule(self, point, search_rule, cur_node)
Search node that satisfy to parameters in search rule.
Definition: kdtree.py:407
right
Right node successor of the node.
Definition: kdtree.py:124
Utils that are used by modules of pyclustering.
Definition: __init__.py:1
def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes)
Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by d...
Definition: kdtree.py:520
def remove(self, point, kwargs)
Remove specified point from kd-tree.
Definition: kdtree.py:250
Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimension...
Definition: kdtree.py:157
def __init__(self, data_list=None, payload_list=None)
Create kd-tree from list of points and from according list of payloads.
Definition: kdtree.py:187
def __init__(self, kdtree_instance)
Initialize KD-tree text visualizer.
Definition: kdtree.py:39
Represents node of KD-Tree.
Definition: kdtree.py:95
def find_node_with_payload(self, point, point_payload, cur_node=None)
Find node with specified coordinates and payload.
Definition: kdtree.py:441
def find_nearest_dist_node(self, point, distance, retdistance=False)
Find nearest neighbor in area with radius = distance.
Definition: kdtree.py:476
def __fill_tree(self, data_list, payload_list)
Fill KD-tree by specified data and create point comparator in line with data type.
Definition: kdtree.py:368
disc
Index of dimension.
Definition: kdtree.py:127
def children(self, node_parent)
Returns list of children of node.
Definition: kdtree.py:547
payload
Payload of node that can be used by user for storing specific information in the node.
Definition: kdtree.py:118
def __create_point_comparator(self, type_point)
Create point comparator.
Definition: kdtree.py:391
def find_nearest_dist_nodes(self, point, distance)
Find neighbors that are located in area that is covered by specified distance.
Definition: kdtree.py:502
def insert(self, point, payload)
Insert new point with payload to kd-tree.
Definition: kdtree.py:204
def __init__(self, data=None, payload=None, left=None, right=None, disc=None, parent=None)
Definition: kdtree.py:101
data
Data point that is presented as list of coodinates.
Definition: kdtree.py:115
def visualize(self, display=True)
Display KD-tree to console.
Definition: kdtree.py:52