pyclustering  0.10.1
pyclustring is a Python, C++ data mining library.
generator.py
1 """!
2 
3 @brief Cluster generator.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2020
7 @copyright BSD-3-Clause
8 
9 """
10 
11 
12 import collections.abc
13 import random
14 
15 
17  """!
18  @brief Data generator provides services to generate data with clusters with normal distribution.
19 
20  """
21 
22  def __init__(self, amount_clusters, dimension, cluster_sizes, cluster_centers=None, cluster_width=1.0):
23  """!
24  @brief Constructs data generator for generating data-sets.
25 
26  @param[in] amount_clusters (uint): Amount of clusters that should be generated.
27  @param[in] dimension (uint): Dimension of each generated point.
28  @param[in] cluster_sizes (uint|array_like): Size of each cluster. In case of 'array_like' input clusters with
29  corresponding sizes are generated.
30  @param[in] cluster_centers (array_like): Optional parameter that defines cluster centers (means).
31  @param[in] cluster_width (uint|array_like): Optional parameter that defines cluster width (standard deviation).
32  In case of 'array_like' input each cluster has own standard deviation.
33 
34  """
35 
36  self.__amount_clusters = amount_clusters
37  self.__dimension = dimension
38 
39  self.__cluster_sizes = cluster_sizes
40  if not isinstance(self.__cluster_sizes, collections.abc.Iterable):
41  self.__cluster_sizes = [self.__cluster_sizes] * amount_clusters
42 
43  self.__cluster_width = cluster_width
44  if not isinstance(self.__cluster_width, collections.abc.Iterable):
45  self.__cluster_width = [self.__cluster_width] * amount_clusters
46 
47  self.__cluster_centers = cluster_centers
48  if self.__cluster_centers is None:
50 
51 
52  def generate(self):
53  """!
54  @brief Generates data in line with generator parameters.
55 
56  """
57  data_points = []
58 
59  for index_cluster in range(self.__amount_clusters):
60  for _ in range(self.__cluster_sizes[index_cluster]):
61  point = self.__generate_point(index_cluster)
62  data_points.append(point)
63 
64  return data_points
65 
66 
67  def __generate_point(self, index_cluster):
68  """!
69  @brief Generates point in line with parameters of specified cluster.
70 
71  @param[in] index_cluster (uint): Index of cluster whose parameters are used for point generation.
72 
73  @return (list) New generated point in line with normal distribution and cluster parameters.
74 
75  """
76  return [ random.gauss(self.__cluster_centers[index_cluster][index_dimension],
77  self.__cluster_width[index_cluster] / 2.0)
78  for index_dimension in range(self.__dimension) ]
79 
80 
81  def __generate_cluster_centers(self, width):
82  """!
83  @brief Generates centers (means in statistical term) for clusters.
84 
85  @param[in] width (list): Width of generated clusters.
86 
87  @return (list) Generated centers in line with normal distribution.
88 
89  """
90  centers = []
91  default_offset = max(width) * 4.0
92  for i in range(self.__amount_clusters):
93  center = [ random.gauss(i * default_offset, width[i] / 2.0) for _ in range(self.__dimension) ]
94  centers.append(center)
95 
96  return centers
pyclustering.cluster.generator.data_generator.__dimension
__dimension
Definition: generator.py:37
pyclustering.cluster.generator.data_generator.__init__
def __init__(self, amount_clusters, dimension, cluster_sizes, cluster_centers=None, cluster_width=1.0)
Constructs data generator for generating data-sets.
Definition: generator.py:22
pyclustering.cluster.generator.data_generator
Data generator provides services to generate data with clusters with normal distribution.
Definition: generator.py:16
pyclustering.cluster.generator.data_generator.__cluster_centers
__cluster_centers
Definition: generator.py:47
pyclustering.cluster.generator.data_generator.generate
def generate(self)
Generates data in line with generator parameters.
Definition: generator.py:52
pyclustering.cluster.generator.data_generator.__amount_clusters
__amount_clusters
Definition: generator.py:36
pyclustering.cluster.generator.data_generator.__generate_cluster_centers
def __generate_cluster_centers(self, width)
Generates centers (means in statistical term) for clusters.
Definition: generator.py:81
pyclustering.cluster.generator.data_generator.__cluster_sizes
__cluster_sizes
Definition: generator.py:39
pyclustering.cluster.generator.data_generator.__generate_point
def __generate_point(self, index_cluster)
Generates point in line with parameters of specified cluster.
Definition: generator.py:67
pyclustering.cluster.generator.data_generator.__cluster_width
__cluster_width
Definition: generator.py:43