generator.py
1 """!
2 
3 @brief Cluster generator.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2020
7 @copyright GNU Public License
8 
9 @cond GNU_PUBLIC_LICENSE
10  PyClustering is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23 
24 """
25 
26 
27 import collections
28 import random
29 
30 
32  """!
33  @brief Data generator provides services to generate data with clusters with normal distribution.
34 
35  """
36 
37  def __init__(self, amount_clusters, dimension, cluster_sizes, cluster_centers=None, cluster_width=1.0):
38  """!
39  @brief Constructs data generator for generating data-sets.
40 
41  @param[in] amount_clusters (uint): Amount of clusters that should be generated.
42  @param[in] dimension (uint): Dimension of each generated point.
43  @param[in] cluster_sizes (uint|array_like): Size of each cluster. In case of 'array_like' input clusters with
44  corresponding sizes are generated.
45  @param[in] cluster_centers (array_like): Optional parameter that defines cluster centers (means).
46  @param[in] cluster_width (uint|array_like): Optional parameter that defines cluster width (standard deviation).
47  In case of 'array_like' input each cluster has own standard deviation.
48 
49  """
50 
51  self.__amount_clusters = amount_clusters
52  self.__dimension = dimension
53 
54  self.__cluster_sizes = cluster_sizes
55  if not isinstance(self.__cluster_sizes, collections.Iterable):
56  self.__cluster_sizes = [self.__cluster_sizes] * amount_clusters
57 
58  self.__cluster_width = cluster_width
59  if not isinstance(self.__cluster_width, collections.Iterable):
60  self.__cluster_width = [self.__cluster_width] * amount_clusters
61 
62  self.__cluster_centers = cluster_centers
63  if self.__cluster_centers is None:
65 
66 
67  def generate(self):
68  """!
69  @brief Generates data in line with generator parameters.
70 
71  """
72  data_points = []
73 
74  for index_cluster in range(self.__amount_clusters):
75  for _ in range(self.__cluster_sizes[index_cluster]):
76  point = self.__generate_point(index_cluster)
77  data_points.append(point)
78 
79  return data_points
80 
81 
82  def __generate_point(self, index_cluster):
83  """!
84  @brief Generates point in line with parameters of specified cluster.
85 
86  @param[in] index_cluster (uint): Index of cluster whose parameters are used for point generation.
87 
88  @return (list) New generated point in line with normal distribution and cluster parameters.
89 
90  """
91  return [ random.gauss(self.__cluster_centers[index_cluster][index_dimension],
92  self.__cluster_width[index_cluster] / 2.0)
93  for index_dimension in range(self.__dimension) ]
94 
95 
96  def __generate_cluster_centers(self, width):
97  """!
98  @brief Generates centers (means in statistical term) for clusters.
99 
100  @param[in] width (list): Width of generated clusters.
101 
102  @return (list) Generated centers in line with normal distribution.
103 
104  """
105  centers = []
106  default_offset = max(width) * 4.0
107  for i in range(self.__amount_clusters):
108  center = [ random.gauss(i * default_offset, width[i] / 2.0) for _ in range(self.__dimension) ]
109  centers.append(center)
110 
111  return centers
def __generate_cluster_centers(self, width)
Generates centers (means in statistical term) for clusters.
Definition: generator.py:96
def __init__(self, amount_clusters, dimension, cluster_sizes, cluster_centers=None, cluster_width=1.0)
Constructs data generator for generating data-sets.
Definition: generator.py:37
Data generator provides services to generate data with clusters with normal distribution.
Definition: generator.py:31
def generate(self)
Generates data in line with generator parameters.
Definition: generator.py:67
def __generate_point(self, index_cluster)
Generates point in line with parameters of specified cluster.
Definition: generator.py:82