3 @brief Cluster analysis algorithm: DBSCAN. 4 @details Implementation based on paper @cite inproceedings::dbscan::1. 6 @authors Andrei Novikov (pyclustering@yandex.ru) 8 @copyright GNU Public License 10 @cond GNU_PUBLIC_LICENSE 11 PyClustering is free software: you can redistribute it and/or modify 12 it under the terms of the GNU General Public License as published by 13 the Free Software Foundation, either version 3 of the License, or 14 (at your option) any later version. 16 PyClustering is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License for more details. 21 You should have received a copy of the GNU General Public License 22 along with this program. If not, see <http://www.gnu.org/licenses/>. 32 from pyclustering.core.wrapper
import ccore_library
34 import pyclustering.core.dbscan_wrapper
as wrapper
39 @brief Class represents clustering algorithm DBSCAN. 40 @details This DBSCAN algorithm is KD-tree optimized. 42 CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance. 46 from pyclustering.cluster.dbscan import dbscan 47 from pyclustering.cluster import cluster_visualizer 48 from pyclustering.utils import read_sample 49 from pyclustering.samples.definitions import FCPS_SAMPLES 51 # Sample for cluster analysis. 52 sample = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK) 54 # Create DBSCAN algorithm. 55 dbscan_instance = dbscan(sample, 0.7, 3) 57 # Start processing by DBSCAN. 58 dbscan_instance.process() 60 # Obtain results of clustering. 61 clusters = dbscan_instance.get_clusters() 62 noise = dbscan_instance.get_noise() 64 # Visualize clustering results 65 visualizer = cluster_visualizer() 66 visualizer.append_clusters(clusters, sample) 67 visualizer.append_cluster(noise, sample, marker='x') 73 def __init__(self, data, eps, neighbors, ccore=True, **kwargs):
75 @brief Constructor of clustering algorithm DBSCAN. 77 @param[in] data (list): Input data that is presented as list of points or distance matrix (defined by parameter 78 'data_type', by default data is considered as a list of points). 79 @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less then the radius. 80 @param[in] neighbors (uint): minimum number of shared neighbors that is required for establish links between points. 81 @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem. 82 @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type'). 84 <b>Keyword Args:</b><br> 85 - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix'). 98 self.
__data_type = kwargs.get(
'data_type',
'points')
107 self.
__ccore = ccore_library.workable()
114 @brief Performs cluster analysis in line with rules of DBSCAN algorithm. 116 @return (dbscan) Returns itself (DBSCAN instance). 133 if cluster
is not None:
145 @brief Returns allocated clusters. 147 @remark Allocated clusters can be returned only after data processing (use method process()). Otherwise empty list is returned. 149 @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data. 161 @brief Returns allocated noise. 163 @remark Allocated noise can be returned only after data processing (use method process() before). Otherwise empty list is returned. 165 @return (list) List of indexes that are marked as a noise. 177 @brief Returns clustering result representation type that indicate how clusters are encoded. 179 @return (type_encoding) Clustering result representation. 185 return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
188 def __verify_arguments(self):
190 @brief Verify input parameters for the algorithm and throw exception in case of incorrectness. 194 raise ValueError(
"Input data is empty (size: '%d')." % len(self.
__pointer_data))
197 raise ValueError(
"Connectivity radius (current value: '%d') should be greater or equal to 0." % self.
__eps)
200 def __create_neighbor_searcher(self, data_type):
202 @brief Returns neighbor searcher in line with data type. 204 @param[in] data_type (string): Data type (points or distance matrix). 207 if data_type ==
'points':
209 elif data_type ==
'distance_matrix':
212 raise TypeError(
"Unknown type of data is specified '%s'" % data_type)
215 def __expand_cluster(self, index_point):
217 @brief Expands cluster from specified point in the input data space. 219 @param[in] index_point (list): Index of a point from the data. 221 @return (list) Return tuple of list of indexes that belong to the same cluster and list of points that are marked as noise: (cluster, noise), or None if nothing has been expanded. 230 cluster = [index_point]
241 neighbors += [k
for k
in next_neighbors
if ( (k
in neighbors) ==
False)
and k != index_point]
250 def __neighbor_indexes_points(self, index_point):
252 @brief Return neighbors of the specified object in case of sequence of points. 254 @param[in] index_point (uint): Index point whose neighbors are should be found. 256 @return (list) List of indexes of neighbors in line the connectivity radius. 260 return [node_tuple[1].payload
for node_tuple
in kdnodes
if node_tuple[1].payload != index_point]
263 def __neighbor_indexes_distance_matrix(self, index_point):
265 @brief Return neighbors of the specified object in case of distance matrix. 267 @param[in] index_point (uint): Index point whose neighbors are should be found. 269 @return (list) List of indexes of neighbors in line the connectivity radius. 273 return [index_neighbor
for index_neighbor
in range(len(distances))
274 if ((distances[index_neighbor] <= self.
__eps)
and (index_neighbor != index_point))]
Class represents clustering algorithm DBSCAN.
def get_clusters(self)
Returns allocated clusters.
Module for representing clustering results.
def __init__(self, data, eps, neighbors, ccore=True, kwargs)
Constructor of clustering algorithm DBSCAN.
def process(self)
Performs cluster analysis in line with rules of DBSCAN algorithm.
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
def __expand_cluster(self, index_point)
Expands cluster from specified point in the input data space.
Represents balanced static KD-tree that does not provide services to add and remove nodes after initi...
def __neighbor_indexes_points(self, index_point)
Return neighbors of the specified object in case of sequence of points.
def __create_neighbor_searcher(self, data_type)
Returns neighbor searcher in line with data type.
def __verify_arguments(self)
Verify input parameters for the algorithm and throw exception in case of incorrectness.
def get_noise(self)
Returns allocated noise.
def __neighbor_indexes_distance_matrix(self, index_point)
Return neighbors of the specified object in case of distance matrix.