d9/de6/dbscan_8py_source.html

 """!

 @brief Cluster analysis algorithm: DBSCAN.
 @details Implementation based on paper @cite inproceedings::dbscan::1.

 @authors Andrei Novikov (pyclustering@yandex.ru)
 @date 2014-2019
 @copyright GNU Public License

 @cond GNU_PUBLIC_LICENSE
     PyClustering is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation, either version 3 of the License, or
     (at your option) any later version.

     PyClustering is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 @endcond

 """


 from pyclustering.container.kdtree import kdtree

 from pyclustering.cluster.encoder import type_encoding

 from pyclustering.core.wrapper import ccore_library

 import pyclustering.core.dbscan_wrapper as wrapper


 class dbscan:
     """!
     @brief Class represents clustering algorithm DBSCAN.
     @details This DBSCAN algorithm is KD-tree optimized.

              CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.

     Example:
     @code
         from pyclustering.cluster.dbscan import dbscan
         from pyclustering.cluster import cluster_visualizer
         from pyclustering.utils import read_sample
         from pyclustering.samples.definitions import FCPS_SAMPLES

         # Sample for cluster analysis.
         sample = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)

         # Create DBSCAN algorithm.
         dbscan_instance = dbscan(sample, 0.7, 3)

         # Start processing by DBSCAN.
         dbscan_instance.process()

         # Obtain results of clustering.
         clusters = dbscan_instance.get_clusters()
         noise = dbscan_instance.get_noise()

         # Visualize clustering results
         visualizer = cluster_visualizer()
         visualizer.append_clusters(clusters, sample)
         visualizer.append_cluster(noise, sample, marker='x')
         visualizer.show()
     @endcode

     """

     def __init__(self, data, eps, neighbors, ccore = True, **kwargs):
         """!
         @brief Constructor of clustering algorithm DBSCAN.

         @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
         @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less then the radius.
         @param[in] neighbors (uint): minimum number of shared neighbors that is required for establish links between points.
         @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
         @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').

         <b>Keyword Args:</b><br>
             - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').

         """

         self.__pointer_data = data
         self.__kdtree = None
         self.__eps = eps
         self.__sqrt_eps = eps * eps
         self.__neighbors = neighbors

         self.__visited = [False] * len(self.__pointer_data)
         self.__belong = [False] * len(self.__pointer_data)

         self.__data_type = kwargs.get('data_type', 'points')

         self.__clusters = []
         self.__noise = []

         self.__neighbor_searcher = self.__create_neighbor_searcher(self.__data_type)

         self.__ccore = ccore
         if self.__ccore:
             self.__ccore = ccore_library.workable()


     def process(self):
         """!
         @brief Performs cluster analysis in line with rules of DBSCAN algorithm.

         @see get_clusters()
         @see get_noise()

         """

         if self.__ccore is True:
             (self.__clusters, self.__noise) = wrapper.dbscan(self.__pointer_data, self.__eps, self.__neighbors, self.__data_type)

         else:
             if self.__data_type == 'points':
                 self.__kdtree = kdtree(self.__pointer_data, range(len(self.__pointer_data)))

             for i in range(0, len(self.__pointer_data)):
                 if self.__visited[i] is False:
                     cluster = self.__expand_cluster(i)
                     if cluster is not None:
                         self.__clusters.append(cluster)

             for i in range(0, len(self.__pointer_data)):
                 if self.__belong[i] is False:
                     self.__noise.append(i)


     def get_clusters(self):
         """!
         @brief Returns allocated clusters.

         @remark Allocated clusters can be returned only after data processing (use method process()). Otherwise empty list is returned.

         @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.

         @see process()
         @see get_noise()

         """

         return self.__clusters


     def get_noise(self):
         """!
         @brief Returns allocated noise.

         @remark Allocated noise can be returned only after data processing (use method process() before). Otherwise empty list is returned.

         @return (list) List of indexes that are marked as a noise.

         @see process()
         @see get_clusters()

         """

         return self.__noise


     def get_cluster_encoding(self):
         """!
         @brief Returns clustering result representation type that indicate how clusters are encoded.

         @return (type_encoding) Clustering result representation.

         @see get_clusters()

         """

         return type_encoding.CLUSTER_INDEX_LIST_SEPARATION


     def __create_neighbor_searcher(self, data_type):
         """!
         @brief Returns neighbor searcher in line with data type.

         @param[in] data_type (string): Data type (points or distance matrix).

         """
         if data_type == 'points':
             return self.__neighbor_indexes_points
         elif data_type == 'distance_matrix':
             return self.__neighbor_indexes_distance_matrix
         else:
             raise TypeError("Unknown type of data is specified '%s'" % data_type)


     def __expand_cluster(self, index_point):
         """!
         @brief Expands cluster from specified point in the input data space.

         @param[in] index_point (list): Index of a point from the data.

         @return (list) Return tuple of list of indexes that belong to the same cluster and list of points that are marked as noise: (cluster, noise), or None if nothing has been expanded.

         """

         cluster = None
         self.__visited[index_point] = True
         neighbors = self.__neighbor_searcher(index_point)

         if len(neighbors) >= self.__neighbors:
             cluster = [index_point]

             self.__belong[index_point] = True

             for i in neighbors:
                 if self.__visited[i] is False:
                     self.__visited[i] = True

                     next_neighbors = self.__neighbor_searcher(i)

                     if len(next_neighbors) >= self.__neighbors:
                         neighbors += [k for k in next_neighbors if ( (k in neighbors) == False) and k != index_point]

                 if self.__belong[i] is False:
                     cluster.append(i)
                     self.__belong[i] = True

         return cluster


     def __neighbor_indexes_points(self, index_point):
         """!
         @brief Return neighbors of the specified object in case of sequence of points.

         @param[in] index_point (uint): Index point whose neighbors are should be found.

         @return (list) List of indexes of neighbors in line the connectivity radius.

         """
         kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__pointer_data[index_point], self.__eps)
         return [node_tuple[1].payload for node_tuple in kdnodes if node_tuple[1].payload != index_point]


     def __neighbor_indexes_distance_matrix(self, index_point):
         """!
         @brief Return neighbors of the specified object in case of distance matrix.

         @param[in] index_point (uint): Index point whose neighbors are should be found.

         @return (list) List of indexes of neighbors in line the connectivity radius.

         """
         distances = self.__pointer_data[index_point]
         return [index_neighbor for index_neighbor in range(len(distances))
                 if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != index_point))]
pyclustering.cluster.dbscan.dbscan.__neighbor_searcher
__neighbor_searcher
Definition: dbscan.py:102

pyclustering.cluster.dbscan.dbscan.__belong
__belong
Definition: dbscan.py:95

pyclustering.cluster.dbscan.dbscan
Class represents clustering algorithm DBSCAN.
Definition: dbscan.py:37

pyclustering.cluster.dbscan.dbscan.get_clusters
def get_clusters(self)
Returns allocated clusters.
Definition: dbscan.py:136

pyclustering.cluster.dbscan.dbscan.__sqrt_eps
__sqrt_eps
Definition: dbscan.py:91

pyclustering.cluster.encoder
Module for representing clustering results.
Definition: encoder.py:1

pyclustering.container.kdtree.kdtree
Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimension...
Definition: kdtree.py:157

pyclustering.cluster.dbscan.dbscan.__init__
def __init__(self, data, eps, neighbors, ccore=True, kwargs)
Constructor of clustering algorithm DBSCAN.
Definition: dbscan.py:73

pyclustering.cluster.dbscan.dbscan.process
def process(self)
Performs cluster analysis in line with rules of DBSCAN algorithm.
Definition: dbscan.py:109

pyclustering.cluster.dbscan.dbscan.__ccore
__ccore
Definition: dbscan.py:104

pyclustering.cluster.dbscan.dbscan.get_cluster_encoding
def get_cluster_encoding(self)
Returns clustering result representation type that indicate how clusters are encoded.
Definition: dbscan.py:168

pyclustering.cluster.dbscan.dbscan.__expand_cluster
def __expand_cluster(self, index_point)
Expands cluster from specified point in the input data space.
Definition: dbscan.py:196

pyclustering.cluster.dbscan.dbscan.__eps
__eps
Definition: dbscan.py:90

pyclustering.cluster.dbscan.dbscan.__neighbor_indexes_points
def __neighbor_indexes_points(self, index_point)
Return neighbors of the specified object in case of sequence of points.
Definition: dbscan.py:231

pyclustering.cluster.dbscan.dbscan.__kdtree
__kdtree
Definition: dbscan.py:89

pyclustering.cluster.dbscan.dbscan.__create_neighbor_searcher
def __create_neighbor_searcher(self, data_type)
Returns neighbor searcher in line with data type.
Definition: dbscan.py:181

pyclustering.cluster.dbscan.dbscan.__noise
__noise
Definition: dbscan.py:100

pyclustering.cluster.dbscan.dbscan.__clusters
__clusters
Definition: dbscan.py:99

pyclustering.cluster.dbscan.dbscan.__neighbors
__neighbors
Definition: dbscan.py:92

pyclustering.cluster.dbscan.dbscan.__data_type
__data_type
Definition: dbscan.py:97

pyclustering.cluster.dbscan.dbscan.__visited
__visited
Definition: dbscan.py:94

pyclustering.cluster.dbscan.dbscan.__pointer_data
__pointer_data
Definition: dbscan.py:88

pyclustering.cluster.dbscan.dbscan.get_noise
def get_noise(self)
Returns allocated noise.
Definition: dbscan.py:152

pyclustering.cluster.dbscan.dbscan.__neighbor_indexes_distance_matrix
def __neighbor_indexes_distance_matrix(self, index_point)
Return neighbors of the specified object in case of distance matrix.
Definition: dbscan.py:244

pyclustering.container.kdtree
Data Structure: KD-Tree.
Definition: kdtree.py:1