3 @brief Cluster generator.
5 @authors Andrei Novikov (pyclustering@yandex.ru)
7 @copyright BSD-3-Clause
12 import collections.abc
18 @brief Data generator provides services to generate data with clusters with normal distribution.
22 def __init__(self, amount_clusters, dimension, cluster_sizes, cluster_centers=None, cluster_width=1.0):
24 @brief Constructs data generator for generating data-sets.
26 @param[in] amount_clusters (uint): Amount of clusters that should be generated.
27 @param[in] dimension (uint): Dimension of each generated point.
28 @param[in] cluster_sizes (uint|array_like): Size of each cluster. In case of 'array_like' input clusters with
29 corresponding sizes are generated.
30 @param[in] cluster_centers (array_like): Optional parameter that defines cluster centers (means).
31 @param[in] cluster_width (uint|array_like): Optional parameter that defines cluster width (standard deviation).
32 In case of 'array_like' input each cluster has own standard deviation.
54 @brief Generates data in line with generator parameters.
62 data_points.append(point)
67 def __generate_point(self, index_cluster):
69 @brief Generates point in line with parameters of specified cluster.
71 @param[in] index_cluster (uint): Index of cluster whose parameters are used for point generation.
73 @return (list) New generated point in line with normal distribution and cluster parameters.
81 def __generate_cluster_centers(self, width):
83 @brief Generates centers (means in statistical term) for clusters.
85 @param[in] width (list): Width of generated clusters.
87 @return (list) Generated centers in line with normal distribution.
91 default_offset = max(width) * 4.0
93 center = [ random.gauss(i * default_offset, width[i] / 2.0)
for _
in range(self.
__dimension) ]
94 centers.append(center)