3 @brief Data Structure: KD-Tree 4 @details Implementation based on paper @cite book::the_design_and_analysis. 6 @authors Andrei Novikov (pyclustering@yandex.ru) 8 @copyright GNU Public License 10 @cond GNU_PUBLIC_LICENSE 11 PyClustering is free software: you can redistribute it and/or modify 12 it under the terms of the GNU General Public License as published by 13 the Free Software Foundation, either version 3 of the License, or 14 (at your option) any later version. 16 PyClustering is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License for more details. 21 You should have received a copy of the GNU General Public License 22 along with this program. If not, see <http://www.gnu.org/licenses/>. 32 import matplotlib.pyplot
as plt
33 except Exception
as error_instance:
34 warnings.warn(
"Impossible to import matplotlib (please, install 'matplotlib'), pyclustering's visualization " 35 "functionality is not available (details: '%s')." % str(error_instance))
42 @brief KD-tree visualizer that provides service to display graphical representation of the tree using 44 @details The visualizer is able to visualize 2D KD-trees only. 46 There is an example how to visualize balanced KD-tree for `TwoDiamonds` sample using `kdtree_visualizer`: 48 from pyclustering.container.kdtree import kdtree_balanced, kdtree_visualizer 49 from pyclustering.utils import read_sample 50 from pyclustering.samples.definitions import FCPS_SAMPLES 52 sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS) 53 tree_instance = kdtree_balanced(sample) 55 kdtree_visualizer(tree_instance).visualize() 58 Output result of the example above (balanced tree) - figure 1: 59 @image html kd_tree_unbalanced_two_diamonds.png "Fig. 1. Balanced KD-tree for sample 'TwoDiamonds'." 61 There is one more example to demonstrate unbalanced KD-tree. `kdtree` class is child class of `kdtree_balanced` 62 that allows to add points step by step and thus an unbalanced KD-tree can be built. 64 from pyclustering.container.kdtree import kdtree, kdtree_visualizer 65 from pyclustering.utils import read_sample 66 from pyclustering.samples.definitions import FCPS_SAMPLES 68 sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS) 69 tree_instance = kdtree() # Do not use sample in constructor to avoid building of balanced tree. 71 # Fill KD-tree step by step to obtain unbalanced tree. 73 tree_instance.insert(point) 75 kdtree_visualizer(tree_instance).visualize() 78 Output result of the example above (unbalanced tree) - figure 2: 79 @image html kd_tree_unbalanced_two_diamonds.png "Fig. 2. Unbalanced KD-tree for sample 'TwoDiamonds'." 85 @brief Initialize KD-tree visualizer. 87 @param[in] kdtree_instance (kdtree): Instance of a KD-tree that should be visualized. 90 self.
__tree = kdtree_instance
91 self.
__colors = [
'blue',
'red',
'green']
96 @brief Visualize KD-tree using plot in 2-dimensional data-space. 99 node = self.
__tree.get_root()
102 figure = plt.figure(111)
103 ax = figure.add_subplot(111)
107 ax.set_xlim(min[0], max[0])
108 ax.set_ylim(min[1], max[1])
111 def __draw_node(self, ax, node, min, max):
114 if node.left
is not None:
116 rborder[node.disc] = node.data[node.disc]
119 if node.right
is not None:
121 lborder[node.disc] = node.data[node.disc]
124 def __draw_split_line(self, ax, node, min, max):
129 for d
in range(dimension):
131 max_coord[d] = node.data[d]
132 min_coord[d] = node.data[d]
135 ax.plot(node.data[0], node.data[1], color=
'black', marker=
'.', markersize=6)
136 ax.plot([min_coord[0], max_coord[0]], [min_coord[1], max_coord[1]], color=self.
__colors[node.disc],
137 linestyle=
'-', linewidth=1)
139 def __get_limits(self):
140 dimension = len(self.
__tree.get_root().data)
143 max, min = [float(
'-inf')] * dimension, [float(
'+inf')] * dimension
146 for d
in range(dimension):
147 if max[d] < node.data[d]:
148 max[d] = node.data[d]
150 if min[d] > node.data[d]:
151 min[d] = node.data[d]
155 def __get_all_nodes(self):
158 next_level = [self.
__tree.get_root()]
160 while len(next_level) != 0:
161 cur_level = next_level
165 for cur_node
in cur_level:
166 children = cur_node.get_children()
167 if children
is not None:
168 next_level += children
173 root = self.
__tree.get_root()
175 raise ValueError(
"KD-Tree is empty - nothing to visualize.")
177 dimension = len(root.data)
179 raise NotImplementedError(
"KD-Tree data has '%d' dimension - only KD-tree with 2D data can be visualized." 186 @brief Represents a node in a KD-Tree. 187 @details The KD-Tree node contains point's coordinates, discriminator, payload and pointers to parent and children. 194 def __init__(self, data=None, payload=None, left=None, right=None, disc=None, parent=None):
196 @brief Creates KD-tree node. 198 @param[in] data (list): Data point that is presented as list of coordinates. 199 @param[in] payload (any): Payload of node (pointer to essence that is attached to this node). 200 @param[in] left (node): Node of KD-Tree that represents left successor. 201 @param[in] right (node): Node of KD-Tree that represents right successor. 202 @param[in] disc (uint): Index of dimension of that node. 203 @param[in] parent (node): Node of KD-Tree that represents parent. 227 @return (string) Default representation of the node. 233 if self.
left is not None:
234 left = self.
left.data
236 if self.
right is not None:
237 right = self.
right.data
239 return "(%s: [L:'%s', R:'%s'])" % (self.
data, left, right)
243 @return (string) String representation of the node. 250 @brief Returns list of not `None` children of the node. 252 @return (list) list of not `None` children of the node; if the node does not have children 253 then `None` is returned. 257 if self.
left is not None:
259 if self.
right is not None:
266 @brief Represents balanced static KD-tree that does not provide services to add and remove nodes after 268 @details In the term KD tree, k denotes the dimensionality of the space being represented. Each data point is 269 represented as a node in the k-d tree in the form of a record of type node. 271 There is an example how to create KD-tree: 273 from pyclustering.container.kdtree import kdtree_balanced, kdtree_visualizer 274 from pyclustering.utils import read_sample 275 from pyclustering.samples.definitions import FCPS_SAMPLES 277 sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN) 278 tree_instance = kdtree_balanced(sample) 280 kdtree_visualizer(tree_instance).visualize() 283 Output result of the example above - figure 1. 284 @image html kd_tree_balanced_lsun.png "Fig. 1. Balanced KD-tree for sample 'Lsun'." 292 @brief Initializes balanced static KD-tree. 294 @param[in] points (array_like): Points that should be used to build KD-tree. 295 @param[in] payloads (array_like): Payload of each point in `points`. 311 for i
in range(len(points)):
313 if payloads
is not None:
314 payload = payloads[i]
316 nodes.append(
node(points[i], payload,
None,
None, -1,
None))
322 @brief Returns amount of nodes in the KD-tree. 324 @return (uint) Amount of nodes in the KD-tree. 331 @brief Returns root of the tree. 333 @return (node) The root of the tree. 338 def __create_tree(self, nodes, parent, depth):
340 @brief Creates balanced sub-tree using elements from list `nodes`. 342 @param[in] nodes (list): List of KD-tree nodes. 343 @param[in] parent (node): Parent node that is used as a root to build the sub-tree. 344 @param[in] depth (uint): Depth of the tree that where children of the `parent` should be placed. 346 @return (node) Returns a node that is a root of the built sub-tree. 354 nodes.sort(key=
lambda n: n.data[discriminator])
355 median = len(nodes) // 2
360 median = find_left_element(nodes, median,
lambda n1, n2: n1.data[discriminator] < n2.data[discriminator])
365 new_node = nodes[median]
366 new_node.disc = discriminator
367 new_node.parent = parent
368 new_node.left = self.
__create_tree(nodes[:median], new_node, depth + 1)
369 new_node.right = self.
__create_tree(nodes[median + 1:], new_node, depth + 1)
374 def _create_point_comparator(self, type_point):
376 @brief Create point comparator. 377 @details In case of numpy.array specific comparator is required. 379 @param[in] type_point (data_type): Type of point that is stored in KD-node. 381 @return (callable) Callable point comparator to compare to points. 384 if type_point == numpy.ndarray:
385 return lambda obj1, obj2: numpy.array_equal(obj1, obj2)
387 return lambda obj1, obj2: obj1 == obj2
389 def _find_node_by_rule(self, point, search_rule, cur_node):
391 @brief Search node that satisfy to parameters in search rule. 392 @details If node with specified parameters does not exist then None will be returned, 393 otherwise required node will be returned. 395 @param[in] point (list): Coordinates of the point whose node should be found. 396 @param[in] search_rule (lambda): Rule that is called to check whether node satisfies to search parameter. 397 @param[in] cur_node (node): Node from which search should be started. 399 @return (node) Node if it satisfies to input parameters, otherwise it return None. 404 cur_node = self.
_root 407 if cur_node.data[cur_node.disc] <= point[cur_node.disc]:
409 if search_rule(cur_node):
412 cur_node = cur_node.right
414 cur_node = cur_node.left
420 @brief Find node with specified coordinates and payload. 421 @details If node with specified parameters does not exist then None will be returned, 422 otherwise required node will be returned. 424 @param[in] point (list): Coordinates of the point whose node should be found. 425 @param[in] point_payload (any): Payload of the node that is searched in the tree. 426 @param[in] cur_node (node): Node from which search should be started. 428 @return (node) Node if it satisfies to input parameters, otherwise it return None. 432 rule_search =
lambda node, point=point, payload=point_payload: self.
_point_comparator(node.data, point)
and \
433 node.payload == payload
438 @brief Find node with coordinates that are defined by specified point. 439 @details If node with specified parameters does not exist then None will be returned, 440 otherwise required node will be returned. 442 @param[in] point (list): Coordinates of the point whose node should be found. 443 @param[in] cur_node (node): Node from which search should be started. 445 @return (node) Node if it satisfies to input parameters, otherwise it return None. 454 @brief Find nearest neighbor in area with radius = distance. 456 @param[in] point (list): Maximum distance where neighbors are searched. 457 @param[in] distance (double): Maximum distance where neighbors are searched. 458 @param[in] retdistance (bool): If True - returns neighbors with distances to them, otherwise only neighbors 461 @return (node|list) Nearest neighbor if 'retdistance' is False and list with two elements [node, distance] 462 if 'retdistance' is True, where the first element is pointer to node and the second element is 469 if len(best_nodes) == 0:
472 nearest = min(best_nodes, key=
lambda item: item[0])
474 if retdistance
is True:
481 @brief Find neighbors that are located in area that is covered by specified distance. 483 @param[in] point (list): Coordinates that is considered as centroid for searching. 484 @param[in] distance (double): Distance from the center where searching is performed. 486 @return (list) Neighbors in area that is specified by point (center) and distance (radius). 491 if self.
_root is not None:
496 def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes):
498 @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance. 500 @param[in] point (list): Coordinates that is considered as centroid for searching 501 @param[in] distance (double): Distance from the center where searching is performed. 502 @param[in] sqrt_distance (double): Square distance from the center where searching is performed. 503 @param[in] node_head (node): Node from that searching is performed. 504 @param[in|out] best_nodes (list): List of founded nodes. 508 if node_head.right
is not None:
509 minimum = node_head.data[node_head.disc] - distance
510 if point[node_head.disc] >= minimum:
513 if node_head.left
is not None:
514 maximum = node_head.data[node_head.disc] + distance
515 if point[node_head.disc] < maximum:
518 candidate_distance = euclidean_distance_square(point, node_head.data)
519 if candidate_distance <= sqrt_distance:
520 best_nodes.append((candidate_distance, node_head))
526 @brief Represents KD Tree that is a space-partitioning data structure for organizing points 527 in a k-dimensional space. 528 @details In the term k-d tree, k denotes the dimensionality of the space being represented. Each data point is 529 represented as a node in the k-d tree in the form of a record of type node. The tree supports 530 dynamic construction when nodes can be dynamically added and removed. As a result KD-tree might not be 531 balanced if methods `insert` and `remove` are used to built the tree. If the tree is built using 532 constructor where all points are passed to the tree then balanced tree is built. Single point search and 533 range-search procedures have complexity is `O(n)` in worse case in case of unbalanced tree. 534 If there is no need to build dynamic KD-tree, then it is much better to use static KD-tree 537 There is an example how to use KD-tree to search nodes (points from input data) that are nearest to some point: 539 # Import required modules 540 from pyclustering.samples.definitions import SIMPLE_SAMPLES; 541 from pyclustering.container.kdtree import kdtree; 542 from pyclustering.utils import read_sample; 544 # Read data from text file 545 sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3); 547 # Create instance of KD-tree and initialize (fill) it by read data. 548 # In this case the tree is balanced. 549 tree_instance = kdtree(sample); 551 # Search for nearest point 552 search_distance = 0.3; 553 nearest_node = tree_instance.find_nearest_dist_node([1.12, 4.31], search_distance); 555 # Search for nearest point in radius 0.3 556 nearest_nodes = tree_instance.find_nearest_dist_nodes([1.12, 4.31], search_distance); 557 print("Nearest nodes:", nearest_nodes); 560 In case of building KD-tree using `insert` and `remove` method, the output KD-tree might be unbalanced - here 561 is an example that demonstrates this: 563 from pyclustering.container.kdtree import kdtree, kdtree_visualizer 564 from pyclustering.utils import read_sample 565 from pyclustering.samples.definitions import FCPS_SAMPLES 567 sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS) 569 # Build tree using constructor - balanced will be built because tree will know about all points. 570 tree_instance = kdtree(sample) 571 kdtree_visualizer(tree_instance).visualize() 573 # Build tree using `insert` only - unbalanced tree will be built. 574 tree_instance = kdtree() 576 tree_instance.insert(point) 578 kdtree_visualizer(tree_instance).visualize() 581 There are two figures where difference between balanced and unbalanced KD-trees is demonstrated. 583 @image html kd_tree_unbalanced_two_diamonds.png "Fig. 1. Balanced KD-tree for sample 'TwoDiamonds'." 584 @image html kd_tree_unbalanced_two_diamonds.png "Fig. 2. Unbalanced KD-tree for sample 'TwoDiamonds'." 590 def __init__(self, data_list=None, payload_list=None):
592 @brief Create kd-tree from list of points and from according list of payloads. 593 @details If lists were not specified then empty kd-tree will be created. 595 @param[in] data_list (list): Insert points from the list to created KD tree. 596 @param[in] payload_list (list): Insert payload from the list to created KD tree, length should be equal to 597 length of data_list if it is specified. 601 super().
__init__(data_list, payload_list)
605 @brief Insert new point with payload to kd-tree. 607 @param[in] point (list): Coordinates of the point of inserted node. 608 @param[in] payload (any-type): Payload of inserted node. It can be ID of the node or 609 some useful payload that belongs to the point. 611 @return (node) Inserted node to the kd-tree. 615 if self.
_root is None:
617 self.
_root =
node(point, payload,
None,
None, 0)
623 cur_node = self.
_root 626 discriminator = (cur_node.disc + 1) % self.
_dimension 628 if cur_node.data[cur_node.disc] <= point[cur_node.disc]:
629 if cur_node.right
is None:
630 cur_node.right =
node(point, payload,
None,
None, discriminator, cur_node)
633 return cur_node.right
635 cur_node = cur_node.right
638 if cur_node.left
is None:
639 cur_node.left =
node(point, payload,
None,
None, discriminator, cur_node)
644 cur_node = cur_node.left
648 @brief Remove specified point from kd-tree. 649 @details It removes the first found node that satisfy to the input parameters. Make sure that 650 pair (point, payload) is unique for each node, otherwise the first found is removed. 652 @param[in] point (list): Coordinates of the point of removed node. 653 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'payload'). 655 <b>Keyword Args:</b><br> 656 - payload (any): Payload of the node that should be removed. 658 @return (node) Root if node has been successfully removed, otherwise None. 662 if 'payload' in kwargs:
665 node_for_remove = self.
find_node(point,
None)
667 if node_for_remove
is None:
672 parent = node_for_remove.parent
675 self.
_root = minimal_node
678 if minimal_node
is not None:
679 minimal_node.parent =
None 681 if parent.left
is node_for_remove:
682 parent.left = minimal_node
683 elif parent.right
is node_for_remove:
684 parent.right = minimal_node
688 def __recursive_remove(self, node_removed):
690 @brief Delete node and return root of subtree. 692 @param[in] node_removed (node): Node that should be removed. 694 @return (node) Minimal node in line with coordinate that is defined by discriminator. 699 if (node_removed.right
is None)
and (node_removed.left
is None):
702 discriminator = node_removed.disc
705 if node_removed.right
is None:
706 node_removed.right = node_removed.left
707 node_removed.left =
None 711 parent = minimal_node.parent
713 if parent.left
is minimal_node:
715 elif parent.right
is minimal_node:
718 minimal_node.parent = node_removed.parent
719 minimal_node.disc = node_removed.disc
720 minimal_node.right = node_removed.right
721 minimal_node.left = node_removed.left
724 if minimal_node.right
is not None:
725 minimal_node.right.parent = minimal_node
727 if minimal_node.left
is not None:
728 minimal_node.left.parent = minimal_node
732 def __find_minimal_node(self, node_head, discriminator):
734 @brief Find minimal node in line with coordinate that is defined by discriminator. 736 @param[in] node_head (node): Node of KD tree from that search should be started. 737 @param[in] discriminator (uint): Coordinate number that is used for comparison. 739 @return (node) Minimal node in line with discriminator from the specified node. 743 min_key =
lambda cur_node: cur_node.data[discriminator]
745 stack, candidates = [], []
748 while is_finished
is False:
749 if node_head
is not None:
750 stack.append(node_head)
751 node_head = node_head.left
754 node_head = stack.pop()
755 candidates.append(node_head)
756 node_head = node_head.right
760 return min(candidates, key=min_key)
def __find_minimal_node(self, node_head, discriminator)
Find minimal node in line with coordinate that is defined by discriminator.
def __init__(self, kdtree_instance)
Initialize KD-tree visualizer.
def find_nearest_dist_node(self, point, distance, retdistance=False)
Find nearest neighbor in area with radius = distance.
def find_node(self, point, cur_node=None)
Find node with coordinates that are defined by specified point.
KD-tree visualizer that provides service to display graphical representation of the tree using matplo...
left
Left node successor of the node.
def __recursive_remove(self, node_removed)
Delete node and return root of subtree.
def find_node_with_payload(self, point, point_payload, cur_node=None)
Find node with specified coordinates and payload.
def __create_tree(self, nodes, parent, depth)
Creates balanced sub-tree using elements from list nodes.
def __get_all_nodes(self)
parent
Parent node of the node.
def __draw_split_line(self, ax, node, min, max)
right
Right node successor of the node.
Utils that are used by modules of pyclustering.
def remove(self, point, kwargs)
Remove specified point from kd-tree.
def get_root(self)
Returns root of the tree.
def _create_point_comparator(self, type_point)
Create point comparator.
def __draw_node(self, ax, node, min, max)
Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimension...
def visualize(self)
Visualize KD-tree using plot in 2-dimensional data-space.
def __init__(self, data_list=None, payload_list=None)
Create kd-tree from list of points and from according list of payloads.
def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes)
Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by d...
Represents a node in a KD-Tree.
def get_children(self)
Returns list of not None children of the node.
Represents balanced static KD-tree that does not provide services to add and remove nodes after initi...
payload
Payload of node that can be used by user for storing specific information in the node.
def __init__(self, points, payloads=None)
Initializes balanced static KD-tree.
def __init__(self, data=None, payload=None, left=None, right=None, disc=None, parent=None)
Creates KD-tree node.
def _find_node_by_rule(self, point, search_rule, cur_node)
Search node that satisfy to parameters in search rule.
def insert(self, point, payload=None)
Insert new point with payload to kd-tree.
def __len__(self)
Returns amount of nodes in the KD-tree.
data
Data point that is presented as list of coordinates.
def find_nearest_dist_nodes(self, point, distance)
Find neighbors that are located in area that is covered by specified distance.